summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2014-01-14 17:00:14 +0000
committerJeff Darcy <jdarcy@redhat.com>2014-01-14 17:00:14 +0000
commit455791f265e6e581fa4ebddd5dc4642b2201f8ce (patch)
treeebd5cad9534291822f8c47dbbc8162525f8fe92e
parent92eaa72ea4cd0d06c2161842c548008db0eee01c (diff)
parent7d89ec77763dc5076379753c736f7fce2bedd9ec (diff)
Merge branch 'upstream' into merge
-rw-r--r--MAINTAINERS200
-rw-r--r--api/src/glfs-fops.c12
-rw-r--r--api/src/glfs.c4
-rw-r--r--cli/src/cli-cmd-parser.c43
-rw-r--r--cli/src/cli-cmd-volume.c35
-rw-r--r--cli/src/cli-rpc-ops.c258
-rw-r--r--cli/src/cli-xml-output.c13
-rw-r--r--cli/src/cli.h14
-rw-r--r--configure.ac2
-rw-r--r--doc/admin-guide/en-US/markdown/.gitignore2
-rw-r--r--doc/admin-guide/en-US/markdown/Administration_Guide.md1
-rw-r--r--doc/admin-guide/en-US/markdown/Author_Group.md5
-rw-r--r--doc/admin-guide/en-US/markdown/Book_Info.md1
-rw-r--r--doc/admin-guide/en-US/markdown/Chapter.md18
-rw-r--r--doc/admin-guide/en-US/markdown/Preface.md22
-rw-r--r--doc/admin-guide/en-US/markdown/Revision_History.md4
-rw-r--r--doc/admin-guide/en-US/markdown/admin_ACLs.md46
-rw-r--r--doc/admin-guide/en-US/markdown/admin_Hadoop.md60
-rw-r--r--doc/admin-guide/en-US/markdown/admin_UFO.md286
-rw-r--r--doc/admin-guide/en-US/markdown/admin_commandref.md180
-rw-r--r--doc/admin-guide/en-US/markdown/admin_console.md5
-rw-r--r--doc/admin-guide/en-US/markdown/admin_directory_Quota.md26
-rw-r--r--doc/admin-guide/en-US/markdown/admin_geo-replication.md134
-rw-r--r--doc/admin-guide/en-US/markdown/admin_managing_volumes.md274
-rw-r--r--doc/admin-guide/en-US/markdown/admin_monitoring_workload.md118
-rw-r--r--doc/admin-guide/en-US/markdown/admin_puppet.md499
-rw-r--r--doc/admin-guide/en-US/markdown/admin_setting_volumes.md161
-rw-r--r--doc/admin-guide/en-US/markdown/admin_settingup_clients.md120
-rw-r--r--doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md34
-rw-r--r--doc/admin-guide/en-US/markdown/admin_storage_pools.md21
-rw-r--r--doc/admin-guide/en-US/markdown/admin_troubleshooting.md214
-rw-r--r--doc/admin-guide/en-US/markdown/gfs_introduction.md29
-rw-r--r--doc/admin-guide/en-US/markdown/glossary.md104
-rwxr-xr-xdoc/admin-guide/en-US/markdown/pdfgen.sh16
-rw-r--r--doc/mount.glusterfs.885
-rw-r--r--extras/Ubuntu/README.Ubuntu14
-rw-r--r--extras/Ubuntu/glusterfs-server.conf (renamed from extras/Ubuntu/glusterd.conf)0
-rw-r--r--extras/Ubuntu/mounting-glusterfs.conf5
-rw-r--r--extras/glusterfs-georep-logrotate18
-rwxr-xr-xgeo-replication/src/peer_gsec_create.in10
-rw-r--r--geo-replication/syncdaemon/configinterface.py41
-rw-r--r--geo-replication/syncdaemon/gsyncd.py10
-rw-r--r--geo-replication/syncdaemon/master.py799
-rw-r--r--geo-replication/syncdaemon/resource.py176
-rw-r--r--geo-replication/syncdaemon/syncdutils.py2
-rw-r--r--glusterfs.spec.in231
-rw-r--r--libglusterfs/src/client_t.h1
-rw-r--r--libglusterfs/src/dict.c4
-rw-r--r--libglusterfs/src/event-history.c1
-rw-r--r--libglusterfs/src/glusterfs.h10
-rw-r--r--libglusterfs/src/logging.c35
-rw-r--r--libglusterfs/src/logging.h2
-rw-r--r--libglusterfs/src/mem-types.h3
-rw-r--r--libglusterfs/src/store.c16
-rw-r--r--libglusterfs/src/syscall.c11
-rw-r--r--rpc/rpc-lib/src/auth-glusterfs.c12
-rw-r--r--rpc/rpc-lib/src/auth-null.c9
-rw-r--r--rpc/rpc-lib/src/auth-unix.c6
-rw-r--r--rpc/rpc-lib/src/protocol-common.h17
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c9
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h3
-rw-r--r--rpc/rpc-lib/src/rpc-drc.c42
-rw-r--r--rpc/rpc-lib/src/rpcsvc-auth.c50
-rw-r--r--rpc/rpc-lib/src/rpcsvc-common.h2
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c10
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h8
-rw-r--r--rpc/rpc-transport/socket/src/name.c4
-rw-r--r--rpc/rpc-transport/socket/src/socket.c2
-rwxr-xr-xtests/bugs/brick-uid-reset-on-volume-restart.t47
-rwxr-xr-xtests/bugs/bug-1037501.t253
-rw-r--r--tests/bugs/bug-1040408.t31
-rwxr-xr-xtests/bugs/bug-1043886.t55
-rw-r--r--tests/bugs/bug-1046308.t19
-rwxr-xr-xtests/bugs/bug-1046624.t49
-rw-r--r--tests/bugs/bug-1047955.t23
-rw-r--r--tests/bugs/bug-1051896.c94
-rw-r--r--tests/bugs/bug-1051896.t24
-rw-r--r--tests/bugs/bug-765564.t2
-rwxr-xr-xtests/bugs/bug-921072.t9
-rw-r--r--xlators/cluster/afr/src/afr-common.c44
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c5
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c66
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c42
-rw-r--r--xlators/cluster/afr/src/afr.c13
-rw-r--r--xlators/cluster/afr/src/afr.h5
-rw-r--r--xlators/cluster/dht/src/dht-common.c12
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c2
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c7
-rw-r--r--xlators/cluster/dht/src/dht-shared.c3
-rw-r--r--xlators/cluster/stripe/src/stripe.c7
-rw-r--r--xlators/encryption/crypt/src/crypt.c31
-rw-r--r--xlators/features/index/src/index.c13
-rw-r--r--xlators/features/locks/src/clear.c11
-rw-r--r--xlators/features/locks/src/common.c120
-rw-r--r--xlators/features/locks/src/common.h35
-rw-r--r--xlators/features/locks/src/entrylk.c490
-rw-r--r--xlators/features/locks/src/inodelk.c208
-rw-r--r--xlators/features/locks/src/locks.h36
-rw-r--r--xlators/features/locks/src/posix.c109
-rw-r--r--xlators/features/quota/src/quota.c13
-rw-r--r--xlators/lib/src/libxlator.c55
-rw-r--r--xlators/lib/src/libxlator.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c127
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c563
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c38
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c53
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c114
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c463
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c30
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h15
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c2
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in528
-rw-r--r--xlators/nfs/server/src/acl3.c44
-rw-r--r--xlators/nfs/server/src/acl3.h6
-rw-r--r--xlators/nfs/server/src/mount3.c50
-rw-r--r--xlators/nfs/server/src/nfs.c18
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h3
-rw-r--r--xlators/nfs/server/src/nfs3.h4
-rw-r--r--xlators/nfs/server/src/nlm4.c2
-rw-r--r--xlators/performance/io-cache/src/page.c2
-rw-r--r--xlators/protocol/client/src/client.c10
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c4
-rw-r--r--xlators/protocol/server/src/server.c34
-rw-r--r--xlators/storage/bd/src/bd.c7
-rw-r--r--xlators/storage/posix/src/posix-handle.c23
-rw-r--r--xlators/storage/posix/src/posix-helpers.c14
-rw-r--r--xlators/storage/posix/src/posix.c70
135 files changed, 5290 insertions, 3529 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
new file mode 100644
index 000000000..39c042b9c
--- /dev/null
+++ b/MAINTAINERS
@@ -0,0 +1,200 @@
+GlusterFS Maintainers
+=====================
+
+The intention of this file is not to establish who owns what portions of the
+code base, but to provide a set of names that developers can consult when they
+have a question about a particular subset and also to provide a set of names
+to be CC'd when submitting a patch to obtain appropriate review.
+
+In general, if you have a question about inclusion of a patch, you should
+consult gluster-devel@nongnu.org and not any specific individual privately.
+
+Descriptions of section entries:
+
+ M: Mail patches to: FullName <address@domain>
+ L: Mailing list that is relevant to this area
+ W: Web-page with status/info
+ Q: Patchwork web based patch tracking system site
+ T: SCM tree type and location. Type is one of: git, hg, quilt, stgit.
+ S: Status, one of the following:
+ Supported: Someone is actually paid to look after this.
+ Maintained: Someone actually looks after it.
+ Odd Fixes: It has a maintainer but they don't have time to do
+ much other than throw the odd patch in. See below.
+ Orphan: No current maintainer [but maybe you could take the
+ role as you write your new code].
+ Obsolete: Old code. Something tagged obsolete generally means
+ it has been replaced by a better system and you
+ should be using that.
+ F: Files and directories with wildcard patterns.
+ A trailing slash includes all files and subdirectory files.
+ F: drivers/net/ all files in and below drivers/net
+ F: drivers/net/* all files in drivers/net, but not below
+ F: */net/* all files in "any top level directory"/net
+ One pattern per line. Multiple F: lines acceptable.
+ X: Files and directories that are NOT maintained, same rules as F:
+ Files exclusions are tested before file matches.
+ Can be useful for excluding a specific subdirectory, for instance:
+ F: net/
+ X: net/ipv6/
+ matches all files in and below net excluding net/ipv6/
+ K: Keyword perl extended regex pattern to match content in a
+ patch or file. For instance:
+ K: of_get_profile
+ matches patches or files that contain "of_get_profile"
+ K: \b(printk|pr_(info|err))\b
+ matches patches or files that contain one or more of the words
+ printk, pr_info or pr_err
+ One regex pattern per line. Multiple K: lines acceptable.
+
+
+General Project Architects
+--------------------------
+M: Amar Tumballi <amarts@redhat.com>
+M: Anand Avati <avati@redhat.com>
+M: Jeff Darcy <jdarcy@redhat.com>
+M: Kaleb S. Keithley <kkeithle@redhat.com>
+M: Vijay Bellur <vbellur@redhat.com>
+
+xlators:
+--------
+Automatic File Replication (AFR)
+M: Pranith Karampuri <pkarampu@redhat.com>
+S: Maintained
+F: xlators/cluster/afr/
+
+Distributed Hashing Table (DHT)
+M: Shishir Gowda <gowda.shishir@gmail.com>
+S: Maintained
+F: xlators/cluster/dht/
+
+Performance
+M: Raghavendra Gowdappa <rgowdapp@redhat.com>
+S: Maintained
+F: xlators/performance/
+
+Index
+M: Pranith Karampuri <pkarampu@redhat.com>
+S: Maintained
+F: xlators/features/index/
+
+Quota
+M: Krishnan Parthasarathi <kparthas@redhat.com>
+M: Raghavendra Gowdappa <rgowdapp@redhat.com>
+S: Maintained
+F: xlators/features/quota/
+
+Marker
+M: Raghavendra Gowdappa <rgowdapp@redhat.com>
+S: Maintained
+F: xlators/features/marker/
+
+NFS Server
+M: Santosh Pradhan <spradhan@redhat.com>
+S: Maintained
+F: xlators/nfs/
+
+Changelog
+M: Venky Shankar <vshankar@redhat.com>
+S: Maintained
+F: xlators/features/changelog/
+
+Block Device
+M: M. Mohan Kumar <mohan@in.ibm.com>
+S: Maintained
+F: xlators/storage/bd/
+
+FUSE Bridge
+M: Anand Avati <avati@redhat.com>
+M: Brian Foster <bfoster@redhat.com>
+M: Csaba Henk <chenk@redhat.com>
+S: Maintained
+F: xlators/mount/
+
+
+Other bits of code:
+-------------------
+Geo Replication
+M: Venky Shankar <vshankar@redhat.com>
+S: Maintained
+F: geo-replication/
+
+libgfapi
+M: Anand Avati <avati@redhat.com>
+S: Maintained
+F: api/
+
+Management Daemon
+M: Krishnan Parthasarathi <kparthas@redhat.com>
+S: Maintained
+F: cli/
+F: xlators/mgmt/
+
+Remote Procedure Call subsystem
+M: Amar Tumballi <amarts@redhat.com>
+M: Anand Avati <avati@redhat.com>
+S: Maintained
+F: rpc/
+
+
+Distribution Specific:
+----------------------
+Debian Packaging
+M: Patrick Matthäi <pmatthaei@debian.org>
+M: Louis Zuckerman <me@louiszuckerman.com>
+W: http://packages.qa.debian.org/g/glusterfs.html
+
+Fedora Packaging
+M: glusterfs-owner@fedoraproject.org
+M: Kaleb Keithley <kkeithle@redhat.com>
+M: Niels de Vos <ndevos@redhat.com>
+W: https://apps.fedoraproject.org/packages/glusterfs
+T: http://pkgs.fedoraproject.org/git/glusterfs.git
+
+NetBSD port
+M: Emmanuel Dreyfus <manu@netbsd.org>
+W: http://pkgsrc.se/filesystems/glusterfs
+
+Ubuntu Packaging
+M: Louis Zuckerman <me@louiszuckerman.com>
+W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Ubuntu/Ubuntu.README
+
+
+Related projects
+----------------
+Gluster Openstack Swift
+M: Luis Pabon <lpabon@redhat.com>
+S: Maintained
+T: https://github.com/gluster/gluster-swift.git
+
+GlusterFS Hadoop HCFS plugin
+M: Jay Vyas <jvyas@redhat.com>
+T: https://github.com/gluster/glusterfs-hadoop.git
+
+NFS-Ganesha FSAL plugin
+M: Anand Subramanian <ansubram@redhat.com>
+S: Maintained
+T: git://github.com/nfs-ganesha/nfs-ganesha.git
+F: src/nfs-ganesha~/src/FSAL/FSAL_GLUSTER/
+
+QEMU integration
+M: Bharata B Rao <bharata@linux.vnet.ibm.com>
+S: Maintained
+T: git://git.qemu.org/qemu.git
+F: block/gluster.c
+
+Samba VFS plugin
+M: Raghavendra Talur <rtalur@redhat.com>
+M: Chris Hertel <chertel@redhat.com>
+M: Jose Rivera <jrivera@redhat.com>
+M: Ira Cooper <icooper@redhat.com>
+S: Maintained
+T: git://git.samba.org/samba.git
+F: source3/modules/vfs_glusterfs.c
+
+Wireshark dissectors
+M: Niels de Vos <ndevos@redhat.com>
+S: Maintained
+T: http://code.wireshark.org/git/wireshark
+F: epan/dissectors/packet-gluster*
+
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
index 8d905193a..e1fa697f9 100644
--- a/api/src/glfs-fops.c
+++ b/api/src/glfs-fops.c
@@ -752,10 +752,14 @@ glfs_preadv_async (struct glfs_fd *glfd, const struct iovec *iovec, int count,
offset, flags, NULL);
out:
- if (ret) {
- GF_FREE (gio->iov);
- GF_FREE (gio);
- STACK_DESTROY (frame->root);
+ if (ret) {
+ if (gio) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+ if (frame) {
+ STACK_DESTROY (frame->root);
+ }
glfs_subvol_done (fs, subvol);
}
diff --git a/api/src/glfs.c b/api/src/glfs.c
index 29ed47c0c..1bae78d23 100644
--- a/api/src/glfs.c
+++ b/api/src/glfs.c
@@ -666,8 +666,8 @@ glfs_fini (struct glfs *fs)
glfs_subvol_done (fs, subvol);
- if (ctx->log.logfile)
- fclose (ctx->log.logfile);
+ if (gf_log_fini(ctx) != 0)
+ ret = -1;
return ret;
}
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 8a37d8e78..dd9398184 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -38,13 +38,6 @@ str_getunamb (const char *tok, char **opwords)
return (char *)cli_getunamb (tok, (void **)opwords, id_sel);
}
-static inline gf_boolean_t
-cli_is_quota_cmd (char *key)
-{
- return ((strcmp (key, "quota") == 0) ||
- (strcmp (key, "features.quota") == 0));
-}
-
int32_t
cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
char **bricks, int *brick_count)
@@ -865,11 +858,6 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options,
char *value = NULL;
int i = 0;
char str[50] = {0,};
- char *true_keys[] = {"on", "enable", "yes",
- "true", "1", NULL};
- char *false_keys[] = {"off", "disable", "no",
- "false", "0", NULL};
- char *w1 = NULL, *w2 = NULL;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -922,25 +910,6 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options,
if (ret == 0)
*options = dict;
goto out;
- } else if (wordcount == 5 && cli_is_quota_cmd ((char *)words[3])) {
- value = (char *)words[4];
- w1 = str_getunamb (value, true_keys);
- w2 = str_getunamb (value, false_keys);
- if (w1 != NULL) {
- gf_asprintf (op_errstr,"'gluster volume set <VOLNAME> "
- "%s %s' is deprecated. Use 'gluster "
- "volume quota <VOLNAME> enable' instead.",
- (char *)words[3], w1);
- ret = -1;
- goto out;
- } else if (w2 != NULL) {
- gf_asprintf (op_errstr,"'gluster volume set <VOLNAME> "
- "%s %s' is deprecated. Use 'gluster "
- "volume quota <VOLNAME> disable' instead.",
- (char *)words[3], w2);
- ret = -1;
- goto out;
- }
}
for (i = 3; i < wordcount; i+=2) {
@@ -1737,13 +1706,13 @@ config_parse (const char **words, int wordcount, dict_t *dict,
}
append_str[append_len - 2] = '\0';
/* "checkpoint now" is special: we resolve that "now" */
- if (strcmp (words[cmdi + 1], "checkpoint") == 0 &&
- strcmp (append_str, "now") == 0) {
+ if ((strcmp (words[cmdi + 1], "checkpoint") == 0) &&
+ (strcmp (append_str, "now") == 0)) {
struct timeval tv = {0,};
ret = gettimeofday (&tv, NULL);
if (ret == -1)
- goto out; /* FIXME: free append_str? */
+ goto out;
GF_FREE (append_str);
append_str = GF_CALLOC (1, 300, cli_mt_append_str);
@@ -1751,10 +1720,8 @@ config_parse (const char **words, int wordcount, dict_t *dict,
ret = -1;
goto out;
}
- strcpy (append_str, "as of ");
- gf_time_fmt (append_str + strlen ("as of "),
- 300 - strlen ("as of "),
- tv.tv_sec, gf_timefmt_FT);
+ snprintf (append_str, 300, "now:%ld.%06ld",
+ tv.tv_sec, tv.tv_usec);
}
ret = dict_set_dynstr (dict, "op_value", append_str);
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index f3b1828a0..a7e72dabd 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -349,6 +349,11 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
int32_t sub_count = 0;
int32_t type = GF_CLUSTER_TYPE_NONE;
cli_local_t *local = NULL;
+ char *trans_type = NULL;
+ char *question = "RDMA transport is"
+ " recommended only for testing purposes"
+ " in this release. Do you want to continue?";
+ gf_answer_t answer = GF_ANSWER_NO;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
@@ -395,6 +400,22 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
}
}
+
+ ret = dict_get_str (options, "transport", &trans_type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get transport type");
+ goto out;
+ }
+
+ if (strcasestr (trans_type, "rdma")) {
+ answer =
+ cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
if (state->mode & GLUSTER_MODE_WIGNORE) {
ret = dict_set_int32 (options, "force", _gf_true);
if (ret) {
@@ -1533,6 +1554,11 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
int sent = 0;
int parse_error = 0;
cli_local_t *local = NULL;
+ int replace_op = 0;
+ char *q = "All replace-brick commands except "
+ "commit force are deprecated. "
+ "Do you want to continue?";
+ gf_answer_t answer = GF_ANSWER_NO;
#ifdef GF_SOLARIS_HOST_OS
cli_out ("Command not supported on Solaris");
@@ -1552,6 +1578,15 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
goto out;
}
+ ret = dict_get_int32 (options, "operation", &replace_op);
+ if (replace_op != GF_REPLACE_OP_COMMIT_FORCE) {
+ answer = cli_cmd_get_confirmation (state, q);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
if (state->mode & GLUSTER_MODE_WIGNORE) {
ret = dict_set_int32 (options, "force", _gf_true);
if (ret) {
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 2cb0ba3d4..d1b39014d 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -3874,85 +3874,22 @@ gf_cli_gsync_config_command (dict_t *dict)
return runner_run (&runner);
}
-int
-gf_cli_fetch_gsyncd_status_values (char *status,
- gf_cli_gsync_status_t *sts_val)
-{
- int32_t ret = -1;
- char *tmp = NULL;
- char *save_ptr = NULL;
- char *key = NULL;
- char *value = NULL;
-
- if (!status || !sts_val) {
- gf_log ("", GF_LOG_ERROR, "status or sts_val is null");
- goto out;
- }
-
- tmp = strtok_r (status, "\n", &save_ptr);
-
- if (tmp)
- sts_val->health = gf_strdup (tmp);
-
- while (tmp) {
- key = strtok_r (tmp, "=", &value);
-
- if ((key) && (!strcmp(key, "Uptime")))
- sts_val->uptime = gf_strdup (value);
-
- if ((key) && (!strcmp(key, "FilesSyncd")))
- sts_val->files_syncd = gf_strdup (value);
-
- if ((key) && (!strcmp(key, "FilesPending")))
- sts_val->files_pending = gf_strdup (value);
-
- if ((key) && (!strcmp(key, "BytesPending"))) {
- value = gf_uint64_2human_readable(atol(value));
- sts_val->bytes_pending = gf_strdup (value);
- }
-
- if ((key) && (!strcmp(key, "DeletesPending")))
- sts_val->deletes_pending = gf_strdup (value);
-
- tmp = strtok_r (NULL, ";", &save_ptr);
- }
-
- if (sts_val->health)
- ret = 0;
-
- if (!sts_val->uptime)
- sts_val->uptime = gf_strdup ("N/A");
-
- if (!sts_val->files_syncd)
- sts_val->files_syncd = gf_strdup ("N/A");
-
- if (!sts_val->files_pending)
- sts_val->files_pending = gf_strdup ("N/A");
-
- if (!sts_val->bytes_pending)
- sts_val->bytes_pending = gf_strdup ("N/A");
-
- if (!sts_val->deletes_pending)
- sts_val->deletes_pending = gf_strdup ("N/A");
-
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
- return ret;
-}
-
char*
-get_struct_variable (int mem_num, gf_cli_gsync_status_t *sts_val)
+get_struct_variable (int mem_num, gf_gsync_status_t *sts_val)
{
switch (mem_num) {
- case 0: return (sts_val->node);
- case 1: return (sts_val->master);
- case 2: return (sts_val->slave);
- case 3: return (sts_val->health);
- case 4: return (sts_val->uptime);
- case 5: return (sts_val->files_syncd);
- case 6: return (sts_val->files_pending);
- case 7: return (sts_val->bytes_pending);
- case 8: return (sts_val->deletes_pending);
+ case 0: return (sts_val->node);
+ case 1: return (sts_val->master);
+ case 2: return (sts_val->brick);
+ case 3: return (sts_val->slave_node);
+ case 4: return (sts_val->worker_status);
+ case 5: return (sts_val->checkpoint_status);
+ case 6: return (sts_val->crawl_status);
+ case 7: return (sts_val->files_syncd);
+ case 8: return (sts_val->files_remaining);
+ case 9: return (sts_val->bytes_remaining);
+ case 10: return (sts_val->purges_remaining);
+ case 11: return (sts_val->total_files_skipped);
default:
goto out;
}
@@ -3963,28 +3900,23 @@ out:
int
gf_cli_print_status (char **title_values,
- gf_cli_gsync_status_t **sts_vals,
+ gf_gsync_status_t **sts_vals,
int *spacing, int gsync_count,
int number_of_fields, int is_detail)
{
- int indents = 0;
int i = 0;
int j = 0;
int ret = 0;
+ int status_fields = 6; /* Indexed at 0 */
int total_spacing = 0;
char **output_values = NULL;
char *tmp = NULL;
char *hyphens = NULL;
- char heading[PATH_MAX] = {0, };
- char indent_spaces[PATH_MAX] = {0, };
/* calculating spacing for hyphens */
for (i = 0; i < number_of_fields; i++) {
- /* Suppressing master and slave output for status detail */
- if ((is_detail) && ((i == 1) || (i == 2))) {
- total_spacing++;
- continue;
- } else if ((!is_detail) && (i > 4)) {
+ /* Suppressing detail output for status */
+ if ((!is_detail) && (i > status_fields)) {
/* Suppressing detailed output for
* status */
continue;
@@ -4018,64 +3950,29 @@ gf_cli_print_status (char **title_values,
goto out;
}
- ret = snprintf(heading, sizeof(heading), "MASTER: %s SLAVE: %s",
- sts_vals[0]->master, sts_vals[0]->slave);
- if (ret) {
- if (ret < sizeof(heading))
- heading[ret] = '\0';
- else
- heading[sizeof(heading) - 1] = '\0';
- ret = 0;
- } else {
- ret = -1;
- goto out;
- }
-
- if (is_detail) {
- cli_out (" ");
- if (strlen(heading) > total_spacing)
- cli_out ("%s", heading);
- else {
- /* Printing the heading with centre justification */
- indents = (total_spacing - strlen(heading)) / 2;
- memset (indent_spaces, ' ', indents);
- indent_spaces[indents] = '\0';
- ret = snprintf (hyphens, total_spacing, "%s%s",
- indent_spaces, heading);
- if (ret) {
- hyphens[ret] = '\0';
- cli_out ("%s", hyphens);
- ret = 0;
- } else {
- ret = -1;
- goto out;
- }
- }
- cli_out (" ");
- }
+ cli_out (" ");
/* setting the title "NODE", "MASTER", etc. from title_values[]
and printing the same */
for (j = 0; j < number_of_fields; j++) {
- /* Suppressing master and slave output for status detail */
- if ((is_detail) && ((j == 1) || (j == 2))) {
- output_values[j][0] = '\0';
- continue;
- } else if ((!is_detail) && (j > 4)) {
+ if ((!is_detail) && (j > status_fields)) {
/* Suppressing detailed output for
* status */
output_values[j][0] = '\0';
- continue;
+ continue;
}
memset (output_values[j], ' ', spacing[j]);
memcpy (output_values[j], title_values[j],
strlen(title_values[j]));
output_values[j][spacing[j]] = '\0';
}
- cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
- output_values[1], output_values[2], output_values[3],
- output_values[4], output_values[5], output_values[6],
- output_values[7], output_values[8]);
+ cli_out ("%s %s %s %s %s %s %s %s %s %s %s %s",
+ output_values[0], output_values[1],
+ output_values[2], output_values[3],
+ output_values[4], output_values[5],
+ output_values[6], output_values[7],
+ output_values[8], output_values[9],
+ output_values[10], output_values[11]);
/* setting and printing the hyphens */
memset (hyphens, '-', total_spacing);
@@ -4084,12 +3981,7 @@ gf_cli_print_status (char **title_values,
for (i = 0; i < gsync_count; i++) {
for (j = 0; j < number_of_fields; j++) {
- /* Suppressing master and slave output for
- * status detail */
- if ((is_detail) && ((j == 1) || (j == 2))) {
- output_values[j][0] = '\0';
- continue;
- } else if ((!is_detail) && (j > 4)) {
+ if ((!is_detail) && (j > status_fields)) {
/* Suppressing detailed output for
* status */
output_values[j][0] = '\0';
@@ -4107,10 +3999,13 @@ gf_cli_print_status (char **title_values,
output_values[j][spacing[j]] = '\0';
}
- cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
- output_values[1], output_values[2], output_values[3],
- output_values[4], output_values[5], output_values[6],
- output_values[7], output_values[8]);
+ cli_out ("%s %s %s %s %s %s %s %s %s %s %s %s",
+ output_values[0], output_values[1],
+ output_values[2], output_values[3],
+ output_values[4], output_values[5],
+ output_values[6], output_values[7],
+ output_values[8], output_values[9],
+ output_values[10], output_values[11]);
}
out:
@@ -4130,47 +4025,23 @@ out:
int
gf_cli_read_status_data (dict_t *dict,
- gf_cli_gsync_status_t **sts_vals,
+ gf_gsync_status_t **sts_vals,
int *spacing, int gsync_count,
int number_of_fields)
{
- int ret = 0;
- int i = 0;
- int j = 0;
- char mst[PATH_MAX] = {0, };
- char slv[PATH_MAX] = {0, };
- char sts[PATH_MAX] = {0, };
- char nds[PATH_MAX] = {0, };
- char *status = NULL;
- char *tmp = NULL;
+ char *tmp = NULL;
+ char sts_val_name[PATH_MAX] = "";
+ int ret = 0;
+ int i = 0;
+ int j = 0;
/* Storing per node status info in each object */
for (i = 0; i < gsync_count; i++) {
- snprintf (nds, sizeof(nds), "node%d", i + 1);
- snprintf (mst, sizeof(mst), "master%d", i + 1);
- snprintf (slv, sizeof(slv), "slave%d", i + 1);
- snprintf (sts, sizeof(sts), "status%d", i + 1);
+ snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i);
/* Fetching the values from dict, and calculating
the max length for each field */
- ret = dict_get_str (dict, nds, &(sts_vals[i]->node));
- if (ret)
- goto out;
-
- ret = dict_get_str (dict, mst, &(sts_vals[i]->master));
- if (ret)
- goto out;
-
- ret = dict_get_str (dict, slv, &(sts_vals[i]->slave));
- if (ret)
- goto out;
-
- ret = dict_get_str (dict, sts, &status);
- if (ret)
- goto out;
-
- /* Fetching health and uptime from sts_val */
- ret = gf_cli_fetch_gsyncd_status_values (status, sts_vals[i]);
+ ret = dict_get_bin (dict, sts_val_name, (void **)&(sts_vals[i]));
if (ret)
goto out;
@@ -4192,25 +4063,23 @@ out:
}
int
-gf_cli_gsync_status_output (dict_t *dict, int status_detail)
+gf_cli_gsync_status_output (dict_t *dict, gf_boolean_t is_detail)
{
int gsync_count = 0;
int i = 0;
- int j = 0;
int ret = 0;
- int spacing[10] = {0};
- int num_of_fields = 9;
+ int spacing[13] = {0};
+ int num_of_fields = 12;
char errmsg[1024] = "";
char *master = NULL;
char *slave = NULL;
- char *tmp = NULL;
- char *title_values[] = {"NODE", "MASTER", "SLAVE",
- "HEALTH", "UPTIME",
- "FILES SYNCD",
- "FILES PENDING",
- "BYTES PENDING",
- "DELETES PENDING"};
- gf_cli_gsync_status_t **sts_vals = NULL;
+ char *title_values[] = {"MASTER NODE", "MASTER VOL",
+ "MASTER BRICK", "SLAVE",
+ "STATUS", "CHECKPOINT STATUS",
+ "CRAWL STATUS", "FILES SYNCD",
+ "FILES PENDING", "BYTES PENDING",
+ "DELETES PENDING", "FILES SKIPPED"};
+ gf_gsync_status_t **sts_vals = NULL;
/* Checks if any session is active or not */
ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
@@ -4244,14 +4113,14 @@ gf_cli_gsync_status_output (dict_t *dict, int status_detail)
/* gsync_count = number of nodes reporting output.
each sts_val object will store output of each
node */
- sts_vals = GF_CALLOC (gsync_count, sizeof (gf_cli_gsync_status_t *),
+ sts_vals = GF_CALLOC (gsync_count, sizeof (gf_gsync_status_t *),
gf_common_mt_char);
if (!sts_vals) {
ret = -1;
goto out;
}
for (i = 0; i < gsync_count; i++) {
- sts_vals[i] = GF_CALLOC (1, sizeof (gf_cli_gsync_status_t),
+ sts_vals[i] = GF_CALLOC (1, sizeof (gf_gsync_status_t),
gf_common_mt_char);
if (!sts_vals[i]) {
ret = -1;
@@ -4267,23 +4136,15 @@ gf_cli_gsync_status_output (dict_t *dict, int status_detail)
}
ret = gf_cli_print_status (title_values, sts_vals, spacing, gsync_count,
- num_of_fields, status_detail);
+ num_of_fields, is_detail);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to print status output");
goto out;
}
out:
- if (sts_vals) {
- for (i = 0; i < gsync_count; i++) {
- for (j = 3; j < num_of_fields; j++) {
- tmp = get_struct_variable(j, sts_vals[i]);
- if (tmp)
- GF_FREE (tmp);
- }
- }
+ if (sts_vals)
GF_FREE (sts_vals);
- }
return ret;
}
@@ -4602,7 +4463,10 @@ gf_cli_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
status_detail = dict_get_str_boolean (dict,
"status-detail",
_gf_false);
- ret = gf_cli_gsync_status_output (dict, status_detail);
+ if (status_detail)
+ ret = gf_cli_gsync_status_output (dict, status_detail);
+ else
+ ret = gf_cli_gsync_status_output (dict, status_detail);
break;
case GF_GSYNC_OPTION_TYPE_DELETE:
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index fe0969a30..69fed1bc9 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -251,6 +251,11 @@ cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
}
*node_present = _gf_true;
+ /* <node>
+ * will be closed in the calling function cli_xml_output_vol_status()*/
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hostname",
"%s", hostname);
XML_RET_CHECK_AND_GOTO (ret, out);
@@ -1662,11 +1667,6 @@ cli_xml_output_vol_status (cli_local_t *local, dict_t *dict)
index_max = brick_index_max + other_count;
for (i = 0; i <= index_max; i++) {
- /* <node> */
- ret = xmlTextWriterStartElement (local->writer,
- (xmlChar *)"node");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
ret = cli_xml_output_vol_status_common (local->writer, dict, i,
&online, &node_present);
if (ret) {
@@ -1732,7 +1732,8 @@ cli_xml_output_vol_status (cli_local_t *local, dict_t *dict)
break;
}
- /* </node> */
+
+ /* </node> was opened in cli_xml_output_vol_status_common()*/
ret = xmlTextWriterEndElement (local->writer);
XML_RET_CHECK_AND_GOTO (ret, out);
}
diff --git a/cli/src/cli.h b/cli/src/cli.h
index f5db29383..53537c642 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -148,18 +148,6 @@ struct cli_local {
#endif
};
-struct gf_cli_gsync_detailed_status_ {
- char *node;
- char *master;
- char *slave;
- char *health;
- char *uptime;
- char *files_syncd;
- char *files_pending;
- char *bytes_pending;
- char *deletes_pending;
-};
-
struct cli_volume_status {
int port;
int online;
@@ -178,8 +166,6 @@ struct cli_volume_status {
#endif
};
-typedef struct gf_cli_gsync_detailed_status_ gf_cli_gsync_status_t;
-
typedef struct cli_volume_status cli_volume_status_t;
typedef struct cli_local cli_local_t;
diff --git a/configure.ac b/configure.ac
index 89aba7781..5ad66f86c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -604,7 +604,7 @@ dnl FreeBSD, NetBSD
AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec])
case $host_os in
*netbsd*)
- CFLAGS+=" -D_INCOMPLETE_XOPEN_C063"
+ CFLAGS="${CFLAGS} -D_INCOMPLETE_XOPEN_C063"
;;
esac
AC_CHECK_FUNC([linkat], [have_linkat=yes])
diff --git a/doc/admin-guide/en-US/markdown/.gitignore b/doc/admin-guide/en-US/markdown/.gitignore
new file mode 100644
index 000000000..9eed46004
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/.gitignore
@@ -0,0 +1,2 @@
+output/*.pdf
+
diff --git a/doc/admin-guide/en-US/markdown/Administration_Guide.md b/doc/admin-guide/en-US/markdown/Administration_Guide.md
deleted file mode 100644
index 8b1378917..000000000
--- a/doc/admin-guide/en-US/markdown/Administration_Guide.md
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/doc/admin-guide/en-US/markdown/Author_Group.md b/doc/admin-guide/en-US/markdown/Author_Group.md
deleted file mode 100644
index ef2a5e677..000000000
--- a/doc/admin-guide/en-US/markdown/Author_Group.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Divya
-Muntimadugu
-Red Hat
-Engineering Content Services
-divya@redhat.com
diff --git a/doc/admin-guide/en-US/markdown/Book_Info.md b/doc/admin-guide/en-US/markdown/Book_Info.md
deleted file mode 100644
index 8b1378917..000000000
--- a/doc/admin-guide/en-US/markdown/Book_Info.md
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/doc/admin-guide/en-US/markdown/Chapter.md b/doc/admin-guide/en-US/markdown/Chapter.md
deleted file mode 100644
index 8420259c4..000000000
--- a/doc/admin-guide/en-US/markdown/Chapter.md
+++ /dev/null
@@ -1,18 +0,0 @@
-Test Chapter
-============
-
-This is a test paragraph
-
-Test Section 1
-==============
-
-This is a test paragraph in a section
-
-Test Section 2
-==============
-
-This is a test paragraph in Section 2
-
-1. listitem text
-
-
diff --git a/doc/admin-guide/en-US/markdown/Preface.md b/doc/admin-guide/en-US/markdown/Preface.md
deleted file mode 100644
index f7e934ae8..000000000
--- a/doc/admin-guide/en-US/markdown/Preface.md
+++ /dev/null
@@ -1,22 +0,0 @@
-Preface
-=======
-
-This guide describes how to configure, operate, and manage Gluster File
-System (GlusterFS).
-
-Audience
-========
-
-This guide is intended for Systems Administrators interested in
-configuring and managing GlusterFS.
-
-This guide assumes that you are familiar with the Linux operating
-system, concepts of File System, GlusterFS concepts, and GlusterFS
-Installation
-
-License
-=======
-
-The License information is available at [][].
-
- []: http://www.redhat.com/licenses/rhel_rha_eula.html
diff --git a/doc/admin-guide/en-US/markdown/Revision_History.md b/doc/admin-guide/en-US/markdown/Revision_History.md
deleted file mode 100644
index 2084309d1..000000000
--- a/doc/admin-guide/en-US/markdown/Revision_History.md
+++ /dev/null
@@ -1,4 +0,0 @@
-Revision History
-================
-
-1-0 Thu Apr 5 2012 Divya Muntimadugu <divya@redhat.com> Draft
diff --git a/doc/admin-guide/en-US/markdown/admin_ACLs.md b/doc/admin-guide/en-US/markdown/admin_ACLs.md
index 308e069ca..8fc4e1dae 100644
--- a/doc/admin-guide/en-US/markdown/admin_ACLs.md
+++ b/doc/admin-guide/en-US/markdown/admin_ACLs.md
@@ -1,5 +1,4 @@
-POSIX Access Control Lists
-==========================
+#POSIX Access Control Lists
POSIX Access Control Lists (ACLs) allows you to assign different
permissions for different users or groups even though they do not
@@ -13,14 +12,12 @@ This means, in addition to the file owner, the file group, and others,
additional users and groups can be granted or denied access by using
POSIX ACLs.
-Activating POSIX ACLs Support
-=============================
+##Activating POSIX ACLs Support
To use POSIX ACLs for a file or directory, the partition of the file or
directory must be mounted with POSIX ACLs support.
-Activating POSIX ACLs Support on Sever
---------------------------------------
+###Activating POSIX ACLs Support on Sever
To mount the backend export directories for POSIX ACLs support, use the
following command:
@@ -36,8 +33,7 @@ the following entry for the partition to include the POSIX ACLs option:
`LABEL=/work /export1 ext3 rw, acl 14 `
-Activating POSIX ACLs Support on Client
----------------------------------------
+###Activating POSIX ACLs Support on Client
To mount the glusterfs volumes for POSIX ACLs support, use the following
command:
@@ -48,8 +44,7 @@ For example:
`# mount -t glusterfs -o acl 198.192.198.234:glustervolume /mnt/gluster`
-Setting POSIX ACLs
-==================
+##Setting POSIX ACLs
You can set two types of POSIX ACLs, that is, access ACLs and default
ACLs. You can use access ACLs to grant permission for a specific file or
@@ -60,8 +55,7 @@ of the default ACLs of the directory.
You can set ACLs for per user, per group, for users not in the user
group for the file, and via the effective right mask.
-Setting Access ACLs
--------------------
+##Setting Access ACLs
You can apply access ACLs to grant permission for both files and
directories.
@@ -80,12 +74,12 @@ Permissions must be a combination of the characters `r` (read), `w`
following format and can specify multiple entry types separated by
commas.
- ACL Entry Description
- ---------------------- --------------------------------------------------------------------------------------------------------------------------------------------------
- u:uid:\<permission\> Sets the access ACLs for a user. You can specify user name or UID
- g:gid:\<permission\> Sets the access ACLs for a group. You can specify group name or GID.
- m:\<permission\> Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries.
- o:\<permission\> Sets the access ACLs for users other than the ones in the group for the file.
+ ACL Entry | Description
+ --- | ---
+ u:uid:\<permission\> | Sets the access ACLs for a user. You can specify user name or UID
+ g:gid:\<permission\> | Sets the access ACLs for a group. You can specify group name or GID.
+ m:\<permission\> | Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries.
+ o:\<permission\> | Sets the access ACLs for users other than the ones in the group for the file.
If a file or directory already has an POSIX ACLs, and the setfacl
command is used, the additional permissions are added to the existing
@@ -95,8 +89,7 @@ For example, to give read and write permissions to user antony:
`# setfacl -m u:antony:rw /mnt/gluster/data/testfile `
-Setting Default ACLs
---------------------
+##Setting Default ACLs
You can apply default ACLs only to directories. They determine the
permissions of a file system objects that inherits from its parent
@@ -126,11 +119,9 @@ default ACLs are passed to the files and subdirectories in it:
- A subdirectory inherits the default ACLs of the parent directory
both as its default ACLs and as an access ACLs.
-
- A file inherits the default ACLs as its access ACLs.
-Retrieving POSIX ACLs
-=====================
+##Retrieving POSIX ACLs
You can view the existing POSIX ACLs for a file or directory.
@@ -169,8 +160,7 @@ You can view the existing POSIX ACLs for a file or directory.
default:mask::rwx
default:other::r-x
-Removing POSIX ACLs
-===================
+##Removing POSIX ACLs
To remove all the permissions for a user, groups, or others, use the
following command:
@@ -181,16 +171,14 @@ For example, to remove all permissions from the user antony:
`# setfacl -x u:antony /mnt/gluster/data/test-file`
-Samba and ACLs
-==============
+##Samba and ACLs
If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs
are enabled by default. Samba has been compiled with the
`--with-acl-support` option, so no special flags are required when
accessing or mounting a Samba share.
-NFS and ACLs
-============
+##NFS and ACLs
Currently we do not support ACLs configuration through NFS, i.e. setfacl
and getfacl commands do not work. However, ACLs permissions set using
diff --git a/doc/admin-guide/en-US/markdown/admin_Hadoop.md b/doc/admin-guide/en-US/markdown/admin_Hadoop.md
index 2894fa713..742e8ad62 100644
--- a/doc/admin-guide/en-US/markdown/admin_Hadoop.md
+++ b/doc/admin-guide/en-US/markdown/admin_Hadoop.md
@@ -1,5 +1,4 @@
-Managing Hadoop Compatible Storage
-==================================
+#Managing Hadoop Compatible Storage
GlusterFS provides compatibility for Apache Hadoop and it uses the
standard file system APIs available in Hadoop to provide a new storage
@@ -7,54 +6,44 @@ option for Hadoop deployments. Existing MapReduce based applications can
use GlusterFS seamlessly. This new functionality opens up data within
Hadoop deployments to any file-based or object-based application.
-Architecture Overview
-=====================
+##Architecture Overview
The following diagram illustrates Hadoop integration with GlusterFS:
-Advantages
-==========
+![ Hadoop Architecture ](../images/Hadoop_Architecture.png)
+
+##Advantages
The following are the advantages of Hadoop Compatible Storage with
GlusterFS:
- Provides simultaneous file-based and object-based access within
Hadoop.
-
- Eliminates the centralized metadata server.
-
- Provides compatibility with MapReduce applications and rewrite is
not required.
-
- Provides a fault tolerant file system.
-Preparing to Install Hadoop Compatible Storage
-==============================================
+##Preparing to Install Hadoop Compatible Storage
This section provides information on pre-requisites and list of
dependencies that will be installed during installation of Hadoop
compatible storage.
-Pre-requisites
---------------
+###Pre-requisites
The following are the pre-requisites to install Hadoop Compatible
Storage :
- Hadoop 0.20.2 is installed, configured, and is running on all the
machines in the cluster.
-
- Java Runtime Environment
-
- Maven (mandatory only if you are building the plugin from the
source)
-
- JDK (mandatory only if you are building the plugin from the source)
-
- getfattr - command line utility
-Installing, and Configuring Hadoop Compatible Storage
-=====================================================
+##Installing, and Configuring Hadoop Compatible Storage
This section describes how to install and configure Hadoop Compatible
Storage in your storage environment and verify that it is functioning
@@ -70,9 +59,8 @@ correctly.
The following files will be extracted:
- - /usr/local/lib/glusterfs-Hadoop-version-gluster\_plugin\_version.jar
-
- - /usr/local/lib/conf/core-site.xml
+ - /usr/local/lib/glusterfs-Hadoop-version-gluster\_plugin\_version.jar
+ - /usr/local/lib/conf/core-site.xml
3. (Optional) To install Hadoop Compatible Storage in a different
location, run the following command:
@@ -116,22 +104,13 @@ correctly.
The following are the configurable fields:
- -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Property Name Default Value Description
- ---------------------- -------------------------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- fs.default.name glusterfs://fedora1:9000 Any hostname in the cluster as the server and any port number.
-
- fs.glusterfs.volname hadoopvol GlusterFS volume to mount.
-
- fs.glusterfs.mount /mnt/glusterfs The directory used to fuse mount the volume.
-
- fs.glusterfs.server fedora2 Any hostname or IP address on the cluster except the client/master.
-
- quick.slave.io Off Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster.
- > **Note**
- >
- > This option is not tested widely
- -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Property Name | Default Value | Description
+ --- | --- | ---
+ fs.default.name | glusterfs://fedora1:9000 | Any hostname in the cluster as the server and any port number.
+ fs.glusterfs.volname | hadoopvol | GlusterFS volume to mount.
+ fs.glusterfs.mount | /mnt/glusterfs | The directory used to fuse mount the volume.
+ fs.glusterfs.server | fedora2 | Any hostname or IP address on the cluster except the client/master.
+ quick.slave.io | Off | Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster. **Note*: This option is not tested widely
5. Create a soft link in Hadoop’s library and configuration directory
for the downloaded files (in Step 3) using the following commands:
@@ -141,7 +120,6 @@ correctly.
For example,
`# ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar /lib/glusterfs-0.20.2-0.1.jar`
-
`# ln –s /usr/local/lib/conf/core-site.xml /conf/core-site.xml `
6. (Optional) You can run the following command on Hadoop master to
@@ -150,8 +128,7 @@ correctly.
`# build-deploy-jar.py -d -c `
-Starting and Stopping the Hadoop MapReduce Daemon
-=================================================
+##Starting and Stopping the Hadoop MapReduce Daemon
To start and stop MapReduce daemon
@@ -164,7 +141,6 @@ To start and stop MapReduce daemon
`# /bin/stop-mapred.sh `
> **Note**
->
> You must start Hadoop MapReduce daemon on all servers.
[]: http://download.gluster.com/pub/gluster/glusterfs/qa-releases/3.3-beta-2/glusterfs-hadoop-0.20.2-0.1.x86_64.rpm
diff --git a/doc/admin-guide/en-US/markdown/admin_UFO.md b/doc/admin-guide/en-US/markdown/admin_UFO.md
index 3311eff01..882710410 100644
--- a/doc/admin-guide/en-US/markdown/admin_UFO.md
+++ b/doc/admin-guide/en-US/markdown/admin_UFO.md
@@ -1,5 +1,4 @@
-Managing Unified File and Object Storage
-========================================
+#Managing Unified File and Object Storage
Unified File and Object Storage (UFO) unifies NAS and object storage
technology. It provides a system for data storage that enables users to
@@ -35,8 +34,7 @@ a traditional file system. You will not be able to mount this system
like traditional SAN or NAS volumes and perform POSIX compliant
operations.
-Components of Object Storage
-============================
+##Components of Object Storage
The major components of Object Storage are:
@@ -88,35 +86,26 @@ objects within that account. If a user wants to access the content from
another account, they must have API access key or a session token
provided by their authentication system.
-Advantages of using GlusterFS Unified File and Object Storage
-=============================================================
+##Advantages of using GlusterFS Unified File and Object Storage
The following are the advantages of using GlusterFS UFO:
- No limit on upload and download files sizes as compared to Open
Stack Swift which limits the object size to 5GB.
-
- A unified view of data across NAS and Object Storage technologies.
-
- Using GlusterFS's UFO has other advantages like the following:
-
- High availability
-
- Scalability
-
- Replication
-
- Elastic Volume management
-Preparing to Deploy Unified File and Object Storage
-===================================================
+##Preparing to Deploy Unified File and Object Storage
This section provides information on pre-requisites and list of
dependencies that will be installed during the installation of Unified
File and Object Storage.
-Pre-requisites
---------------
+###Pre-requisites
GlusterFS's Unified File and Object Storage needs `user_xattr` support
from the underlying disk file system. Use the following command to
@@ -128,50 +117,33 @@ For example,
`# mount –o remount,user_xattr /dev/hda1 `
-Dependencies
+####Dependencies
------------
The following packages are installed on GlusterFS when you install
Unified File and Object Storage:
- curl
-
- memcached
-
- openssl
-
- xfsprogs
-
- python2.6
-
- pyxattr
-
- python-configobj
-
- python-setuptools
-
- python-simplejson
-
- python-webob
-
- python-eventlet
-
- python-greenlet
-
- python-pastedeploy
-
- python-netifaces
-Installing and Configuring Unified File and Object Storage
-==========================================================
+##Installing and Configuring Unified File and Object Storage
This section provides instructions on how to install and configure
Unified File and Object Storage in your storage environment.
-Installing Unified File and Object Storage
-------------------------------------------
-
-To install Unified File and Object Storage:
+##Installing Unified File and Object Storage
1. Download `rhel_install.sh` install script from [][] .
@@ -197,15 +169,13 @@ To install Unified File and Object Storage:
> use a load balancer like pound, nginx, and so on to distribute the
> request across the machines.
-Adding Users
-------------
+###Adding Users
The authentication system allows the administrator to grant different
levels of access to different users based on the requirement. The
following are the types of user permissions:
- admin user
-
- normal user
Admin user has read and write permissions on the account. By default, a
@@ -228,10 +198,7 @@ For example,
> the `proxy-server.conf` file. It is highly recommended that you remove
> all the default sample user entries from the configuration file.
-For more information on setting ACLs, see ?.
-
-Configuring Proxy Server
-------------------------
+##Configuring Proxy Server
The Proxy Server is responsible for connecting to the rest of the
OpenStack Object Storage architecture. For each request, it looks up the
@@ -251,7 +218,8 @@ The configurable options pertaining to proxy server are stored in
account_autocreate=true
[filter:tempauth]
- use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin
+ use = egg:swift#tempauth
+ user_admin_admin=admin.admin.reseller_admin
user_test_tester=testing.admin
user_test2_tester2=testing2.admin
user_test_tester3=testing3
@@ -266,15 +234,12 @@ By default, GlusterFS's Unified File and Object Storage is configured to
support HTTP protocol and uses temporary authentication to authenticate
the HTTP requests.
-Configuring Authentication System
----------------------------------
+###Configuring Authentication System
-Proxy server must be configured to authenticate using `
-
- `.
+There are several different authentication system like tempauth, keystone,
+swauth etc. Their respective documentation has detailed usage.
-Configuring Proxy Server for HTTPS
-----------------------------------
+###Configuring Proxy Server for HTTPS
By default, proxy server only handles HTTP request. To configure the
proxy server to process HTTPS requests, perform the following steps:
@@ -288,8 +253,8 @@ proxy server to process HTTPS requests, perform the following steps:
[DEFAULT]
bind_port = 443
- cert_file = /etc/swift/cert.crt
- key_file = /etc/swift/cert.key
+ cert_file = /etc/swift/cert.crt
+ key_file = /etc/swift/cert.key
3. Restart the servers using the following commands:
@@ -298,41 +263,40 @@ proxy server to process HTTPS requests, perform the following steps:
The following are the configurable options:
- Option Default Description
- ------------ ------------ -------------------------------
- bind\_ip 0.0.0.0 IP Address for server to bind
- bind\_port 80 Port for server to bind
- swift\_dir /etc/swift Swift configuration directory
- workers 1 Number of workers to fork
- user swift swift user
- cert\_file Path to the ssl .crt
- key\_file Path to the ssl .key
+ Option | Default | Description
+ ------------ | ------------ | -------------------------------
+ bind\_ip | 0.0.0.0 | IP Address for server to bind
+ bind\_port | 80 | Port for server to bind
+ swift\_dir | /etc/swift | Swift configuration directory
+ workers | 1 | Number of workers to fork
+ user | swift | swift user
+ cert\_file | | Path to the ssl .crt
+ key\_file | | Path to the ssl .key
: proxy-server.conf Default Options in the [DEFAULT] section
- Option Default Description
- ------------------------------- ----------------- -----------------------------------------------------------------------------------------------------------
- use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
- log\_name proxy-server Label used when logging
- log\_facility LOG\_LOCAL0 Syslog log facility
- log\_level INFO Log level
- log\_headers True If True, log headers in each request
- recheck\_account\_existence 60 Cache timeout in seconds to send memcached for account existence
- recheck\_container\_existence 60 Cache timeout in seconds to send memcached for container existence
- object\_chunk\_size 65536 Chunk size to read from object servers
- client\_chunk\_size 65536 Chunk size to read from clients
- memcache\_servers 127.0.0.1:11211 Comma separated list of memcached servers ip:port
- node\_timeout 10 Request timeout to external services
- client\_timeout 60 Timeout to read one chunk from a client
- conn\_timeout 0.5 Connection timeout to external services
- error\_suppression\_interval 60 Time in seconds that must elapse since the last error for a node to be considered no longer error limited
- error\_suppression\_limit 10 Error count to consider a node error limited
- allow\_account\_management false Whether account `PUT`s and `DELETE`s are even callable
+ Option | Default | Description
+ ------------------------------- | ----------------- | -----------------------------------------------------------------------
+ use | | paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name | proxy-server | Label used when logging
+ log\_facility | LOG\_LOCAL0 | Syslog log facility
+ log\_level | INFO | Log level
+ log\_headers | True | If True, log headers in each request
+ recheck\_account\_existence | 60 | Cache timeout in seconds to send memcached for account existence
+ recheck\_container\_existence | 60 | Cache timeout in seconds to send memcached for container existence
+ object\_chunk\_size | 65536 | Chunk size to read from object servers
+ client\_chunk\_size | 65536 | Chunk size to read from clients
+ memcache\_servers | 127.0.0.1:11211 | Comma separated list of memcached servers ip:port
+ node\_timeout | 10 | Request timeout to external services
+ client\_timeout | 60 | Timeout to read one chunk from a client
+ conn\_timeout | 0.5 | Connection timeout to external services
+ error\_suppression\_interval | 60 | Time in seconds that must elapse since the last error for a node to be considered no longer error limited
+ error\_suppression\_limit | 10 | Error count to consider a node error limited
+ allow\_account\_management | false | Whether account `PUT`s and `DELETE`s are even callable
: proxy-server.conf Server Options in the [proxy-server] section
-Configuring Object Server
--------------------------
+##Configuring Object Server
The Object Server is a very simple blob storage server that can store,
retrieve, and delete objects stored on local devices. Objects are stored
@@ -368,36 +332,35 @@ The configurable options pertaining Object Server are stored in the file
The following are the configurable options:
- Option Default Description
- -------------- ------------ ----------------------------------------------------------------------------------------------------
- swift\_dir /etc/swift Swift configuration directory
- devices /srv/node Mount parent directory where devices are mounted
- mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
- bind\_ip 0.0.0.0 IP Address for server to bind
- bind\_port 6000 Port for server to bind
- workers 1 Number of workers to fork
+ Option | Default | Description
+ -------------- | ------------ | ----------------------------------------------------------------------------------------------
+ swift\_dir | /etc/swift | Swift configuration directory
+ devices | /srv/node | Mount parent directory where devices are mounted
+ mount\_check | true | Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip | 0.0.0.0 | IP Address for server to bind
+ bind\_port | 6000 | Port for server to bind
+ workers | 1 | Number of workers to fork
: object-server.conf Default Options in the [DEFAULT] section
- Option Default Description
- ---------------------- --------------- ----------------------------------------------------------------------------------------------------
- use paste.deploy entry point for the object server. For most cases, this should be `egg:swift#object`.
- log\_name object-server log name used when logging
- log\_facility LOG\_LOCAL0 Syslog log facility
- log\_level INFO Logging level
- log\_requests True Whether or not to log each request
- user swift swift user
- node\_timeout 3 Request timeout to external services
- conn\_timeout 0.5 Connection timeout to external services
- network\_chunk\_size 65536 Size of chunks to read or write over the network
- disk\_chunk\_size 65536 Size of chunks to read or write to disk
- max\_upload\_time 65536 Maximum time allowed to upload an object
- slow 0 If \> 0, Minimum time in seconds for a `PUT` or `DELETE` request to complete
+ Option | Default | Description
+ ---------------------- | --------------- | ------------
+ use | | paste.deploy entry point for the object server. For most cases, this should be `egg:swift#object`.
+ log\_name | object-server | log name used when logging
+ log\_facility | LOG\_LOCAL0 | Syslog log facility
+ log\_level | INFO | Logging level
+ log\_requests | True | Whether or not to log each request
+ user | swift | swift user
+ node\_timeout | 3 | Request timeout to external services
+ conn\_timeout | 0.5 | Connection timeout to external services
+ network\_chunk\_size | 65536 | Size of chunks to read or write over the network
+ disk\_chunk\_size | 65536 | Size of chunks to read or write to disk
+ max\_upload\_time | 65536 | Maximum time allowed to upload an object
+ slow | 0 | If \> 0, Minimum time in seconds for a `PUT` or `DELETE` request to complete
: object-server.conf Server Options in the [object-server] section
-Configuring Container Server
-----------------------------
+##Configuring Container Server
The Container Server’s primary job is to handle listings of objects. The
listing is done by querying the GlusterFS mount point with path. This
@@ -430,32 +393,31 @@ The configurable options pertaining to container server are stored in
The following are the configurable options:
- Option Default Description
- -------------- ------------ ----------------------------------------------------------------------------------------------------
- swift\_dir /etc/swift Swift configuration directory
- devices /srv/node Mount parent directory where devices are mounted
- mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
- bind\_ip 0.0.0.0 IP Address for server to bind
- bind\_port 6001 Port for server to bind
- workers 1 Number of workers to fork
- user swift Swift user
+ Option | Default | Description
+ -------------- | ------------ | ------------
+ swift\_dir | /etc/swift | Swift configuration directory
+ devices | /srv/node | Mount parent directory where devices are mounted
+ mount\_check | true | Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip | 0.0.0.0 | IP Address for server to bind
+ bind\_port | 6001 | Port for server to bind
+ workers | 1 | Number of workers to fork
+ user | swift | Swift user
: container-server.conf Default Options in the [DEFAULT] section
- Option Default Description
- --------------- ------------------ ----------------------------------------------------------------------------------------------------------
- use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
- log\_name container-server Label used when logging
- log\_facility LOG\_LOCAL0 Syslog log facility
- log\_level INFO Logging level
- node\_timeout 3 Request timeout to external services
- conn\_timeout 0.5 Connection timeout to external services
+ Option | Default | Description
+ --------------- | ------------------ | ------------
+ use | | paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name | container-server | Label used when logging
+ log\_facility | LOG\_LOCAL0 | Syslog log facility
+ log\_level | INFO | Logging level
+ node\_timeout | 3 | Request timeout to external services
+ conn\_timeout | 0.5 | Connection timeout to external services
: container-server.conf Server Options in the [container-server]
section
-Configuring Account Server
---------------------------
+##Configuring Account Server
The Account Server is very similar to the Container Server, except that
it is responsible for listing of containers rather than objects. In UFO,
@@ -489,29 +451,28 @@ The configurable options pertaining to account server are stored in
The following are the configurable options:
- Option Default Description
- -------------- ------------ ----------------------------------------------------------------------------------------------------
- swift\_dir /etc/swift Swift configuration directory
- devices /srv/node mount parent directory where devices are mounted
- mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
- bind\_ip 0.0.0.0 IP Address for server to bind
- bind\_port 6002 Port for server to bind
- workers 1 Number of workers to fork
- user swift Swift user
+ Option | Default | Description
+ -------------- | ------------ | ---------------------------
+ swift\_dir | /etc/swift | Swift configuration directory
+ devices | /srv/node | mount parent directory where devices are mounted
+ mount\_check | true | Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip | 0.0.0.0 | IP Address for server to bind
+ bind\_port | 6002 | Port for server to bind
+ workers | 1 | Number of workers to fork
+ user | swift | Swift user
: account-server.conf Default Options in the [DEFAULT] section
- Option Default Description
- --------------- ---------------- ----------------------------------------------------------------------------------------------------------
- use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
- log\_name account-server Label used when logging
- log\_facility LOG\_LOCAL0 Syslog log facility
- log\_level INFO Logging level
+ Option | Default | Description
+ --------------- | ---------------- | ---------------------------
+ use | | paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name | account-server | Label used when logging
+ log\_facility | LOG\_LOCAL0 | Syslog log facility
+ log\_level | INFO | Logging level
: account-server.conf Server Options in the [account-server] section
-Starting and Stopping Server
-----------------------------
+##Starting and Stopping Server
You must start the server manually when system reboots and whenever you
update/modify the configuration files.
@@ -524,16 +485,14 @@ update/modify the configuration files.
`# swift_init main stop`
-Working with Unified File and Object Storage
-============================================
+##Working with Unified File and Object Storage
This section describes the REST API for administering and managing
Object Storage. All requests will be directed to the host and URL
described in the `X-Storage-URL HTTP` header obtained during successful
authentication.
-Configuring Authenticated Access
---------------------------------
+###Configuring Authenticated Access
Authentication is the process of proving identity to the system. To use
the REST interface, you must obtain an authorization token using GET
@@ -581,8 +540,7 @@ the headers of the response.
>
> The authentication tokens are valid for a 24 hour period.
-Working with Accounts
----------------------
+##Working with Accounts
This section describes the list of operations you can perform at the
account level of the URL.
@@ -593,11 +551,11 @@ You can list the objects of a specific container, or all containers, as
needed using GET command. You can use the following optional parameters
with GET request to refine the results:
- Parameter Description
- ----------- --------------------------------------------------------------------------
- limit Limits the number of results to at most *n* value.
- marker Returns object names greater in value than the specified marker.
- format Specify either json or xml to return the respective serialized response.
+ Parameter | Description
+ ----------- | --------------------------------------------------------------------------
+ limit | Limits the number of results to at most *n* value.
+ marker | Returns object names greater in value than the specified marker.
+ format | Specify either json or xml to return the respective serialized response.
**To display container information**
@@ -660,8 +618,7 @@ containers and the total bytes stored in the account.
AUTH_tkde3ad38b087b49bbbac0494f7600a554'
https://example.storage.com:443/v1/AUTH_test -k
-Working with Containers
------------------------
+##Working with Containers
This section describes the list of operations you can perform at the
container level of the URL.
@@ -706,14 +663,14 @@ You can list the objects of a container using GET command. You can use
the following optional parameters with GET request to refine the
results:
- Parameter Description
- ----------- --------------------------------------------------------------------------------------------------------------
- limit Limits the number of results to at most *n* value.
- marker Returns object names greater in value than the specified marker.
- prefix Displays the results limited to object names beginning with the substring x. beginning with the substring x.
- path Returns the object names nested in the pseudo path.
- format Specify either json or xml to return the respective serialized response.
- delimiter Returns all the object names nested in the container.
+ Parameter | Description
+ ----------- | --------------------------------------------------------------------------------------------------------------
+ limit | Limits the number of results to at most *n* value.
+ marker | Returns object names greater in value than the specified marker.
+ prefix | Displays the results limited to object names beginning with the substring x. beginning with the substring x.
+ path | Returns the object names nested in the pseudo path.
+ format | Specify either json or xml to return the respective serialized response.
+ delimiter | Returns all the object names nested in the container.
To display objects of a container
@@ -896,8 +853,7 @@ container using cURL (for the above example), run the following command:
https://example.storage.com:443/v1/AUTH_test/images
-H 'X-Container-Read: .r:*' -k
-Working with Objects
---------------------
+##Working with Objects
An object represents the data and any metadata for the files stored in
the system. Through the REST interface, metadata for an object can be
diff --git a/doc/admin-guide/en-US/markdown/admin_commandref.md b/doc/admin-guide/en-US/markdown/admin_commandref.md
deleted file mode 100644
index 4ff05f4ef..000000000
--- a/doc/admin-guide/en-US/markdown/admin_commandref.md
+++ /dev/null
@@ -1,180 +0,0 @@
-Command Reference
-=================
-
-This section describes the available commands and includes the following
-section:
-
-- gluster Command
-
- Gluster Console Manager (command line interpreter)
-
-- glusterd Daemon
-
- Gluster elastic volume management daemon
-
-gluster Command
-===============
-
-**NAME**
-
-gluster - Gluster Console Manager (command line interpreter)
-
-**SYNOPSIS**
-
-To run the program and display the gluster prompt:
-
-**gluster**
-
-To specify a command directly: gluster [COMMANDS] [OPTIONS]
-
-**DESCRIPTION**
-
-The Gluster Console Manager is a command line utility for elastic volume
-management. You can run the gluster command on any export server. The
-command enables administrators to perform cloud operations such as
-creating, expanding, shrinking, rebalancing, and migrating volumes
-without needing to schedule server downtime.
-
-**COMMANDS**
-
- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Command Description
- ---------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- **Volume**
-
- volume info [all | VOLNAME] Displays information about all volumes, or the specified volume.
-
- volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ... Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
-
- volume delete VOLNAME Deletes the specified volume.
-
- volume start VOLNAME Starts the specified volume.
-
- volume stop VOLNAME [force] Stops the specified volume.
-
- volume rename VOLNAME NEW-VOLNAME Renames the specified volume.
-
- volume help Displays help for the volume command.
-
- **Brick**
-
- volume add-brick VOLNAME NEW-BRICK ... Adds the specified brick to the specified volume.
-
- volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status Replaces the specified brick.
-
- volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... Removes the specified brick from the specified volume.
-
- **Rebalance**
-
- volume rebalance VOLNAME start Starts rebalancing the specified volume.
-
- volume rebalance VOLNAME stop Stops rebalancing the specified volume.
-
- volume rebalance VOLNAME status Displays the rebalance status of the specified volume.
-
- **Log**
-
- volume log filename VOLNAME [BRICK] DIRECTORY Sets the log directory for the corresponding volume/brick.
-
- volume log rotate VOLNAME [BRICK] Rotates the log file for corresponding volume/brick.
-
- volume log locate VOLNAME [BRICK] Locates the log file for corresponding volume/brick.
-
- **Peer**
-
- peer probe HOSTNAME Probes the specified peer.
-
- peer detach HOSTNAME Detaches the specified peer.
-
- peer status Displays the status of peers.
-
- peer help Displays help for the peer command.
-
- **Geo-replication**
-
- volume geo-replication MASTER SLAVE start Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.
-
- You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
-
- volume geo-replication MASTER SLAVE stop Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.
-
- You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
-
- volume geo-replication MASTER SLAVE config [options] Configure geo-replication options between the hosts specified by MASTER and SLAVE.
-
- gluster-command COMMAND The path where the gluster command is installed.
-
- gluster-log-level LOGFILELEVEL The log level for gluster processes.
-
- log-file LOGFILE The path to the geo-replication log file.
-
- log-level LOGFILELEVEL The log level for geo-replication.
-
- remote-gsyncd COMMAND The path where the gsyncd binary is installed on the remote machine.
-
- ssh-command COMMAND The ssh command to use to connect to the remote machine (the default is ssh).
-
- rsync-command COMMAND The rsync command to use for synchronizing the files (the default is rsync).
-
- volume\_id= UID The command to delete the existing master UID for the intermediate/slave node.
-
- timeout SECONDS The timeout period.
-
- sync-jobs N The number of simultaneous files/directories that can be synchronized.
-
- ignore-deletes If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.
-
- **Other**
-
- help Display the command options.
-
- quit Exit the gluster command line interface.
- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-**FILES**
-
-/var/lib/glusterd/\*
-
-**SEE ALSO**
-
-fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8),
-glusterd(8)
-
-glusterd Daemon
-===============
-
-**NAME**
-
-glusterd - Gluster elastic volume management daemon
-
-**SYNOPSIS**
-
-glusterd [OPTION...]
-
-**DESCRIPTION**
-
-The glusterd daemon is used for elastic volume management. The daemon
-must be run on all export servers.
-
-**OPTIONS**
-
- Option Description
- ----------------------------------- ----------------------------------------------------------------------------------------------------------------
- **Basic**
- -l=LOGFILE, --log-file=LOGFILE Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).
- -L=LOGLEVEL, --log-level=LOGLEVEL Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
- --debug Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.
- -N, --no-daemon Runs the program in the foreground.
- **Miscellaneous**
- -?, --help Displays this help.
- --usage Displays a short usage message.
- -V, --version Prints the program version.
-
-**FILES**
-
-/var/lib/glusterd/\*
-
-**SEE ALSO**
-
-fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8),
-gluster(8)
diff --git a/doc/admin-guide/en-US/markdown/admin_console.md b/doc/admin-guide/en-US/markdown/admin_console.md
index 9b69de02d..126b7e206 100644
--- a/doc/admin-guide/en-US/markdown/admin_console.md
+++ b/doc/admin-guide/en-US/markdown/admin_console.md
@@ -1,5 +1,4 @@
-Using the Gluster Console Manager – Command Line Utility
-========================================================
+##Using the Gluster Console Manager – Command Line Utility
The Gluster Console Manager is a single command line utility that
simplifies configuration and management of your storage environment. The
@@ -18,7 +17,7 @@ You can also use the commands to create scripts for automation, as well
as use the commands as an API to allow integration with third-party
applications.
-**Running the Gluster Console Manager**
+###Running the Gluster Console Manager
You can run the Gluster Console Manager on any GlusterFS server either
by invoking the commands or by running the Gluster CLI in interactive
diff --git a/doc/admin-guide/en-US/markdown/admin_directory_Quota.md b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md
index 09c757781..21e42c669 100644
--- a/doc/admin-guide/en-US/markdown/admin_directory_Quota.md
+++ b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md
@@ -1,5 +1,4 @@
-Managing Directory Quota
-========================
+#Managing Directory Quota
Directory quotas in GlusterFS allow you to set limits on usage of disk
space by directories or volumes. The storage administrators can control
@@ -19,9 +18,8 @@ the storage for the users depending on their role in the organization.
You can set the quota at the following levels:
-- Directory level – limits the usage at the directory level
-
-- Volume level – limits the usage at the volume level
+- **Directory level** – limits the usage at the directory level
+- **Volume level** – limits the usage at the volume level
> **Note**
>
@@ -29,8 +27,7 @@ You can set the quota at the following levels:
> The disk limit is enforced immediately after creating that directory.
> For more information on setting disk limit, see ?.
-Enabling Quota
-==============
+##Enabling Quota
You must enable Quota to set disk limits.
@@ -45,8 +42,7 @@ You must enable Quota to set disk limits.
# gluster volume quota test-volume enable
Quota is enabled on /test-volume
-Disabling Quota
-===============
+##Disabling Quota
You can disable Quota, if needed.
@@ -61,8 +57,7 @@ You can disable Quota, if needed.
# gluster volume quota test-volume disable
Quota translator is disabled on /test-volume
-Setting or Replacing Disk Limit
-===============================
+##Setting or Replacing Disk Limit
You can create new directories in your storage environment and set the
disk limit or set disk limit for the existing directories. The directory
@@ -86,8 +81,7 @@ being treated as "/".
> In a multi-level directory hierarchy, the strictest disk limit
> will be considered for enforcement.
-Displaying Disk Limit Information
-=================================
+##Displaying Disk Limit Information
You can display disk limit information on all the directories on which
the limit is set.
@@ -119,8 +113,7 @@ the limit is set.
/Test/data 10 GB 6 GB
-Updating Memory Cache Size
-==========================
+##Updating Memory Cache Size
For performance reasons, quota caches the directory sizes on client. You
can set timeout indicating the maximum valid duration of directory sizes
@@ -151,8 +144,7 @@ on client side.
# gluster volume set test-volume features.quota-timeout 5
Set volume successful
-Removing Disk Limit
-===================
+##Removing Disk Limit
You can remove set disk limit, if you do not want quota anymore.
diff --git a/doc/admin-guide/en-US/markdown/admin_geo-replication.md b/doc/admin-guide/en-US/markdown/admin_geo-replication.md
index 849957244..47a2f6628 100644
--- a/doc/admin-guide/en-US/markdown/admin_geo-replication.md
+++ b/doc/admin-guide/en-US/markdown/admin_geo-replication.md
@@ -1,5 +1,4 @@
-Managing Geo-replication
-========================
+#Managing Geo-replication
Geo-replication provides a continuous, asynchronous, and incremental
replication service from one site to another over Local Area Networks
@@ -8,9 +7,9 @@ replication service from one site to another over Local Area Networks
Geo-replication uses a master–slave model, whereby replication and
mirroring occurs between the following partners:
-- Master – a GlusterFS volume
+- **Master** – a GlusterFS volume
-- Slave – a slave which can be of the following types:
+- **Slave** – a slave which can be of the following types:
- A local directory which can be represented as file URL like
`file:///path/to/dir`. You can use shortened form, for example,
@@ -34,37 +33,24 @@ This section introduces Geo-replication, illustrates the various
deployment scenarios, and explains how to configure the system to
provide replication and mirroring in your environment.
-Replicated Volumes vs Geo-replication
-=====================================
+##Replicated Volumes vs Geo-replication
The following table lists the difference between replicated volumes and
geo-replication:
- Replicated Volumes Geo-replication
- --------------------------------------------------------------------------------------- -----------------------------------------------------------------------------------------------------------------
- Mirrors data across clusters Mirrors data across geographically distributed clusters
- Provides high-availability Ensures backing up of data for disaster recovery
- Synchronous replication (each and every file operation is sent across all the bricks) Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences)
+ Replicated Volumes | Geo-replication
+ --- | ---
+ Mirrors data across clusters | Mirrors data across geographically distributed clusters
+ Provides high-availability | Ensures backing up of data for disaster recovery
+ Synchronous replication (each and every file operation is sent across all the bricks) | Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences)
-Preparing to Deploy Geo-replication
-===================================
+##Preparing to Deploy Geo-replication
This section provides an overview of the Geo-replication deployment
scenarios, describes how you can check the minimum system requirements,
and explores common deployment scenarios.
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-Exploring Geo-replication Deployment Scenarios
-----------------------------------------------
+##Exploring Geo-replication Deployment Scenarios
Geo-replication provides an incremental replication service over Local
Area Networks (LANs), Wide Area Network (WANs), and across the Internet.
@@ -72,11 +58,8 @@ This section illustrates the most common deployment scenarios for
Geo-replication, including the following:
- Geo-replication over LAN
-
- Geo-replication over WAN
-
- Geo-replication over the Internet
-
- Multi-site cascading Geo-replication
**Geo-replication over LAN**
@@ -106,22 +89,15 @@ across multiple sites.
![ Multi-site cascading Geo-replication ][]
-Geo-replication Deployment Overview
------------------------------------
+##Geo-replication Deployment Overview
Deploying Geo-replication involves the following steps:
1. Verify that your environment matches the minimum system requirement.
- For more information, see ?.
-
-2. Determine the appropriate deployment scenario. For more information,
- see ?.
+2. Determine the appropriate deployment scenario.
+3. Start Geo-replication on master and slave systems, as required.
-3. Start Geo-replication on master and slave systems, as required. For
- more information, see ?.
-
-Checking Geo-replication Minimum Requirements
----------------------------------------------
+##Checking Geo-replication Minimum Requirements
Before deploying GlusterFS Geo-replication, verify that your systems
match the minimum requirements.
@@ -129,17 +105,16 @@ match the minimum requirements.
The following table outlines the minimum requirements for both master
and slave nodes within your environment:
- Component Master Slave
- ------------------------ --------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Operating System GNU/Linux GNU/Linux
- Filesystem GlusterFS 3.2 or higher GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively)
- Python Python 2.4 (with ctypes external module), or Python 2.5 (or higher) Python 2.4 (with ctypes external module), or Python 2.5 (or higher)
- Secure shell OpenSSH version 4.0 (or higher) SSH2-compliant daemon
- Remote synchronization rsync 3.0.7 or higher rsync 3.0.7 or higher
- FUSE GlusterFS supported versions GlusterFS supported versions
+ Component | Master | Slave
+ --- | --- | ---
+ Operating System | GNU/Linux | GNU/Linux
+ Filesystem | GlusterFS 3.2 or higher | GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively)
+ Python | Python 2.4 (with ctypes external module), or Python 2.5 (or higher) | Python 2.4 (with ctypes external module), or Python 2.5 (or higher)
+ Secure shell | OpenSSH version 4.0 (or higher) | SSH2-compliant daemon
+ Remote synchronization | rsync 3.0.7 or higher | rsync 3.0.7 or higher
+ FUSE | GlusterFS supported versions | GlusterFS supported versions
-Setting Up the Environment for Geo-replication
-----------------------------------------------
+##Setting Up the Environment for Geo-replication
**Time Synchronization**
@@ -172,8 +147,7 @@ geo-replication Start command will be issued) and the remote machine
`# ssh-copy-id -i /var/lib/glusterd/geo-replication/secret.pem.pub @`
-Setting Up the Environment for a Secure Geo-replication Slave
--------------------------------------------------------------
+##Setting Up the Environment for a Secure Geo-replication Slave
You can configure a secure slave using SSH so that master is granted a
restricted access. With GlusterFS, you need not specify configuration
@@ -366,25 +340,13 @@ following command:
`# gluster volume geo-replication '*' config allow-network ::1,127.0.0.1`
-Starting Geo-replication
-========================
+##Starting Geo-replication
This section describes how to configure and start Gluster
Geo-replication in your storage environment, and verify that it is
functioning correctly.
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-Starting Geo-replication
-------------------------
+###Starting Geo-replication
To start Gluster Geo-replication
@@ -401,10 +363,9 @@ To start Gluster Geo-replication
> **Note**
>
> You may need to configure the service before starting Gluster
- > Geo-replication. For more information, see ?.
+ > Geo-replication.
-Verifying Successful Deployment
--------------------------------
+###Verifying Successful Deployment
You can use the gluster command to verify the status of Gluster
Geo-replication in your environment.
@@ -425,8 +386,7 @@ Geo-replication in your environment.
______ ______________________________ ____________
Volume1 root@example.com:/data/remote_dir Starting....
-Displaying Geo-replication Status Information
----------------------------------------------
+###Displaying Geo-replication Status Information
You can display status information about a specific geo-replication
master session, or a particular master-slave session, or all
@@ -480,15 +440,13 @@ geo-replication sessions, as needed.
- **OK**: The geo-replication session is in a stable state.
- **Faulty**: The geo-replication session has witnessed some
- abnormality and the situation has to be investigated further. For
- further information, see ? section.
+ abnormality and the situation has to be investigated further.
- **Corrupt**: The monitor thread which is monitoring the
geo-replication session has died. This situation should not occur
- normally, if it persists contact Red Hat Support[][1].
+ normally.
-Configuring Geo-replication
----------------------------
+##Configuring Geo-replication
To configure Gluster Geo-replication
@@ -496,16 +454,13 @@ To configure Gluster Geo-replication
`# gluster volume geo-replication config [options]`
- For more information about the options, see ?.
-
For example:
To view list of all option/value pair, use the following command:
`# gluster volume geo-replication Volume1 example.com:/data/remote_dir config`
-Stopping Geo-replication
-------------------------
+##Stopping Geo-replication
You can use the gluster command to stop Gluster Geo-replication (syncing
of data from Master to Slave) in your environment.
@@ -522,10 +477,7 @@ of data from Master to Slave) in your environment.
Stopping geo-replication session between Volume1 and
example.com:/data/remote_dir has been successful
- See ? for more information about the gluster command.
-
-Restoring Data from the Slave
-=============================
+##Restoring Data from the Slave
You can restore data from the slave to the master volume, whenever the
master volume becomes faulty for reasons like hardware failure.
@@ -687,15 +639,13 @@ Run the following command on slave (example.com):
Starting geo-replication session between Volume1 &
example.com:/data/remote_dir has been successful
-Best Practices
-==============
+##Best Practices
**Manually Setting Time**
If you have to change the time on your bricks manually, then you must
-set uniform time on all bricks. This avoids the out-of-time sync issue
-described in ?. Setting time backward corrupts the geo-replication
-index, so the recommended way to set the time manually is:
+set uniform time on all bricks. Setting time backward corrupts the
+geo-replication index, so the recommended way to set the time manually is:
1. Stop geo-replication between the master and slave using the
following command:
@@ -730,9 +680,9 @@ machine / chroot/container type solution) by the administrator to run
the geo-replication slave in it. Enhancement in this regard will be
available in follow-up minor release.
- [ Geo-replication over LAN ]: images/Geo-Rep_LAN.png
- [ Geo-replication over WAN ]: images/Geo-Rep_WAN.png
- [ Geo-replication over Internet ]: images/Geo-Rep03_Internet.png
- [ Multi-site cascading Geo-replication ]: images/Geo-Rep04_Cascading.png
+ [ Geo-replication over LAN ]: ../images/Geo-Rep_LAN.png
+ [ Geo-replication over WAN ]: ../images/Geo-Rep_WAN.png
+ [ Geo-replication over Internet ]: ../images/Geo-Rep03_Internet.png
+ [ Multi-site cascading Geo-replication ]: ../images/Geo-Rep04_Cascading.png
[]: http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html
[1]: www.redhat.com/support/
diff --git a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
index 6c06e27a0..f59134b80 100644
--- a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
+++ b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
@@ -1,167 +1,104 @@
-Managing GlusterFS Volumes
-==========================
+#Managing GlusterFS Volumes
This section describes how to perform common GlusterFS management
operations, including the following:
-- ?
+- [Tuning Volume Options](#tuning-options)
+- [Expanding Volumes](#expanding-volumes)
+- [Shrinking Volumes](#shrinking-volumes)
+- [Migrating Volumes](#migrating-volumes)
+- [Rebalancing Volumes](#rebalancing-volumes)
+- [Stopping Volumes](#stopping-volumes)
+- [Deleting Volumes](#deleting-volumes)
+- [Triggering Self-Heal on Replicate](#self-heal)
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-Tuning Volume Options
-=====================
+<a name="tuning-options" />
+##Tuning Volume Options
You can tune volume options, as needed, while the cluster is online and
available.
> **Note**
>
-> Red Hat recommends you to set server.allow-insecure option to ON if
+> It is recommended that you to set server.allow-insecure option to ON if
> there are too many bricks in each volume or if there are too many
> services which have already utilized all the privileged ports in the
> system. Turning this option ON allows ports to accept/reject messages
> from insecure ports. So, use this option only if your deployment
> requires it.
-To tune volume options
-
-- Tune volume options using the following command:
+Tune volume options using the following command:
`# gluster volume set `
- For example, to specify the performance cache size for test-volume:
-
- # gluster volume set test-volume performance.cache-size 256MB
- Set volume successful
-
- The following table lists the Volume options along with its
- description and default value:
-
- > **Note**
- >
- > The default options given here are subject to modification at any
- > given time and may not be the same for all versions.
-
- -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Option Description Default Value Available Options
- -------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------- ---------------------------------------------------------------------------------------
- auth.allow IP addresses of the clients which should be allowed to access the volume. \* (allow all) Valid IP address which includes wild card patterns including \*, such as 192.168.1.\*
-
- auth.reject IP addresses of the clients which should be denied to access the volume. NONE (reject none) Valid IP address which includes wild card patterns including \*, such as 192.168.2.\*
-
- client.grace-timeout Specifies the duration for the lock state to be maintained on the client after a network disconnection. 10 10 - 1800 secs
-
- cluster.self-heal-window-size Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. 16 0 - 1025 blocks
-
- cluster.data-self-heal-algorithm Specifies the type of self-heal. If you set the option as "full", the entire file is copied from source to destinations. If the option is set to "diff" the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the "diff" algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than "diff" which has to read checksums and then read and write. reset full | diff | reset
-
- cluster.min-free-disk Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. 10% Percentage of required minimum free disk space
-
- cluster.stripe-block-size Specifies the size of the stripe unit that will be read from or written to. 128 KB (for all files) size in bytes
-
- cluster.self-heal-daemon Allows you to turn-off proactive self-heal on replicated volumes. on On | Off
-
- cluster.ensure-durability This option makes sure the data/metadata is durable across abrupt shutdown of the brick. on On | Off
-
- diagnostics.brick-log-level Changes the log-level of the bricks. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE
-
- diagnostics.client-log-level Changes the log-level of the clients. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE
-
- diagnostics.latency-measurement Statistics related to the latency of each operation would be tracked. off On | Off
-
- diagnostics.dump-fd-stats Statistics related to file-operations would be tracked. off On | Off
-
- feature.read-only Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it. off On | Off
-
- features.lock-heal Enables self-healing of locks when the network disconnects. on On | Off
-
- features.quota-timeout For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. 0 0 - 3600 secs
-
- geo-replication.indexing Use this option to automatically sync the changes in the filesystem from Master to Slave. off On | Off
-
- network.frame-timeout The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. 1800 (30 mins) 1800 secs
-
- network.ping-timeout The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. 42 Secs 42 Secs
- This reconnect is a very expensive operation and should be avoided.
+For example, to specify the performance cache size for test-volume:
- nfs.enable-ino32 For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: off On | Off
- \* Built 32-bit and run on 32-bit machines.
-
- \* Built 32-bit on 64-bit systems.
-
- \* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts.
-
- Either of the conditions above can lead to application on Linux NFS clients failing with "Invalid argument" or "Value too large for defined data type" errors.
+ # gluster volume set test-volume performance.cache-size 256MB
+ Set volume successful
- nfs.volume-access Set the access type for the specified sub-volume. read-write read-write|read-only
+The following table lists the Volume options along with its
+description and default value:
- nfs.trusted-write If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. off On | Off
- In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner.
-
- nfs.trusted-sync All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. off On | Off
-
- nfs.export-dir This option can be used to export specified comma separated subdirectories in the volume. The path must be an absolute path. Along with path allowed list of IPs/hostname can be associated with each subdirectory. If provided connection will allowed only from these IPs. Format: \<dir\>[(hostspec[|hostspec|...])][,...]. Where hostspec can be an IP address, hostname or an IP range in CIDR notation. **Note**: Care must be taken while configuring this option as invalid entries and/or unreachable DNS servers can introduce unwanted delay in all the mount calls. No sub directory exported. Absolute path with allowed list of IP/hostname.
-
- nfs.export-volumes Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. on On | Off
-
- nfs.rpc-auth-unix Enable/Disable the AUTH\_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required. on On | Off
-
- nfs.rpc-auth-null Enable/Disable the AUTH\_NULL authentication type. It is not recommended to change the default value for this option. on On | Off
-
- nfs.rpc-auth-allow\<IP- Addresses\> Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name
-
- nfs.rpc-auth-reject IP- Addresses Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name
-
- nfs.ports-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. off On | Off
-
- nfs.addr-namelookup Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.\* filters. on On | Off
-
- nfs.register-with- portmap For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. on On | Off
-
- nfs.port \<PORT- NUMBER\> Use this option on systems that need Gluster NFS to be associated with a non-default port number. 38465- 38467
-
- nfs.disable Turn-off volume being exported by NFS off On | Off
-
- performance.write-behind-window-size Size of the per-file write-behind buffer. 1 MB Write-behind cache size
-
- performance.io-thread-count The number of threads in IO threads translator. 16 0 - 65
-
- performance.flush-behind If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. On On | Off
-
- performance.cache-max-file-size Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. 2 \^ 64 -1 bytes size in bytes
-
- performance.cache-min-file-size Sets the minimum file size cached by the io-cache translator. Values same as "max" above. 0B size in bytes
-
- performance.cache-refresh-timeout The cached data for a file will be retained till 'cache-refresh-timeout' seconds, after which data re-validation is performed. 1 sec 0 - 61
-
- performance.cache-size Size of the read cache. 32 MB size in bytes
-
- server.allow-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. on On | Off
-
- server.grace-timeout Specifies the duration for the lock state to be maintained on the server after a network disconnection. 10 10 - 1800 secs
-
- server.statedump-path Location of the state dump file. /tmp directory of the brick New directory path
-
- storage.health-check-interval Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. /tmp directory of the brick New directory path
- -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
- You can view the changed volume options using
- the` # gluster volume info ` command. For more information, see ?.
-
-Expanding Volumes
-=================
+> **Note**
+>
+> The default options given here are subject to modification at any
+> given time and may not be the same for all versions.
+
+
+Option | Description | Default Value | Available Options
+--- | --- | --- | ---
+auth.allow | IP addresses of the clients which should be allowed to access the volume. | \* (allow all) | Valid IP address which includes wild card patterns including \*, such as 192.168.1.\*
+auth.reject | IP addresses of the clients which should be denied to access the volume. | NONE (reject none) | Valid IP address which includes wild card patterns including \*, such as 192.168.2.\*
+client.grace-timeout | Specifies the duration for the lock state to be maintained on the client after a network disconnection. | 10 | 10 - 1800 secs
+cluster.self-heal-window-size | Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. | 16 | 0 - 1025 blocks
+cluster.data-self-heal-algorithm | Specifies the type of self-heal. If you set the option as "full", the entire file is copied from source to destinations. If the option is set to "diff" the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the "diff" algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than "diff" which has to read checksums and then read and write. | reset | full/diff/reset
+cluster.min-free-disk | Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks | 10% | Percentage of required minimum free disk space
+cluster.stripe-block-size | Specifies the size of the stripe unit that will be read from or written to. | 128 KB (for all files) | size in bytes
+cluster.self-heal-daemon | Allows you to turn-off proactive self-heal on replicated | On | On/Off
+cluster.ensure-durability | This option makes sure the data/metadata is durable across abrupt shutdown of the brick. | On | On/Off
+diagnostics.brick-log-level | Changes the log-level of the bricks. | INFO | DEBUG/WARNING/ERROR/CRITICAL/NONE/TRACE
+diagnostics.client-log-level | Changes the log-level of the clients. | INFO | DEBUG/WARNING/ERROR/CRITICAL/NONE/TRACE
+diagnostics.latency-measurement | Statistics related to the latency of each operation would be tracked. | Off | On/Off
+diagnostics.dump-fd-stats | Statistics related to file-operations would be tracked. | Off | On
+feature.read-only | Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it. | Off | On/Off
+features.lock-heal | Enables self-healing of locks when the network disconnects. | On | On/Off
+features.quota-timeout | For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid | 0 | 0 - 3600 secs
+geo-replication.indexing | Use this option to automatically sync the changes in the filesystem from Master to Slave. | Off | On/Off
+network.frame-timeout | The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. | 1800 (30 mins) | 1800 secs
+network.ping-timeout | The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. This reconnect is a very expensive operation and should be avoided. | 42 Secs | 42 Secs
+nfs.enable-ino32 | For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. | Off | On/Off
+nfs.volume-access | Set the access type for the specified sub-volume. | read-write | read-write/read-only
+nfs.trusted-write | If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner. | Off | On/Off
+nfs.trusted-sync | All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. | Off | On/Off
+nfs.export-dir | This option can be used to export specified comma separated subdirectories in the volume. The path must be an absolute path. Along with path allowed list of IPs/hostname can be associated with each subdirectory. If provided connection will allowed only from these IPs. Format: \<dir\>[(hostspec[hostspec...])][,...]. Where hostspec can be an IP address, hostname or an IP range in CIDR notation. **Note**: Care must be taken while configuring this option as invalid entries and/or unreachable DNS servers can introduce unwanted delay in all the mount calls. | No sub directory exported. | Absolute path with allowed list of IP/hostname
+nfs.export-volumes | Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. | On | On/Off
+nfs.rpc-auth-unix | Enable/Disable the AUTH\_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required. | On | On/Off
+nfs.rpc-auth-null | Enable/Disable the AUTH\_NULL authentication type. It is not recommended to change the default value for this option. | On | On/Off
+nfs.rpc-auth-allow\<IP- Addresses\> | Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes. | Reject All | IP address or Host name
+nfs.rpc-auth-reject\<IP- Addresses\> | Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes. | Reject All | IP address or Host name
+nfs.ports-insecure | Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. | Off | On/Off
+nfs.addr-namelookup | Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.\* filters. | On | On/Off
+nfs.register-with-portmap | For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. | On | On/Off
+nfs.port \<PORT- NUMBER\> | Use this option on systems that need Gluster NFS to be associated with a non-default port number. | NA | 38465- 38467
+nfs.disable | Turn-off volume being exported by NFS | Off | On/Off
+performance.write-behind-window-size | Size of the per-file write-behind buffer. | 1MB | Write-behind cache size
+performance.io-thread-count | The number of threads in IO threads translator. | 16 | 0-65
+performance.flush-behind | If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. | On | On/Off
+performance.cache-max-file-size | Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. | 2 \^ 64 -1 bytes | size in bytes
+performance.cache-min-file-size | Sets the minimum file size cached by the io-cache translator. Values same as "max" above | 0B | size in bytes
+performance.cache-refresh-timeout | The cached data for a file will be retained till 'cache-refresh-timeout' seconds, after which data re-validation is performed. | 1s | 0-61
+performance.cache-size | Size of the read cache. | 32 MB | size in bytes
+server.allow-insecure | Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. | On | On/Off
+server.grace-timeout | Specifies the duration for the lock state to be maintained on the server after a network disconnection. | 10 | 10 - 1800 secs
+server.statedump-path | Location of the state dump file. | tmp directory of the brick | New directory path
+storage.health-check-interval | Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. | tmp directory of the brick | New directory path
+
+You can view the changed volume options using command:
+
+ ` # gluster volume info `
+
+<a name="expanding-volumes" />
+##Expanding Volumes
You can expand volumes, as needed, while the cluster is online and
available. For example, you might want to add a brick to a distributed
@@ -221,8 +158,8 @@ replicated volume, increasing the capacity of the GlusterFS volume.
You can use the rebalance command as described in ?.
-Shrinking Volumes
-=================
+<a name="shrinking-volumes" />
+##Shrinking Volumes
You can shrink volumes, as needed, while the cluster is online and
available. For example, you might need to remove a brick that has become
@@ -295,8 +232,8 @@ set).
You can use the rebalance command as described in ?.
-Migrating Volumes
-=================
+<a name="migrating-volumes" />
+##Migrating Volumes
You can migrate the data from one brick to another, as needed, while the
cluster is online and available.
@@ -306,8 +243,6 @@ cluster is online and available.
1. Make sure the new brick, server5 in this example, is successfully
added to the cluster.
- For more information, see ?.
-
2. Migrate the data from one brick to another using the following
command:
@@ -401,8 +336,8 @@ cluster is online and available.
In the above example, previously, there were bricks; 1,2,3, and 4
and now brick 3 is replaced by brick 5.
-Rebalancing Volumes
-===================
+<a name="rebalancing-volumes" />
+##Rebalancing Volumes
After expanding or shrinking a volume (using the add-brick and
remove-brick commands respectively), you need to rebalance the data
@@ -414,15 +349,13 @@ layout and/or data.
This section describes how to rebalance GlusterFS volumes in your
storage environment, using the following common scenarios:
-- Fix Layout - Fixes the layout changes so that the files can actually
- go to newly added nodes. For more information, see ?.
+- **Fix Layout** - Fixes the layout changes so that the files can actually
+ go to newly added nodes.
-- Fix Layout and Migrate Data - Rebalances volume by fixing the layout
- changes and migrating the existing data. For more information, see
- ?.
+- **Fix Layout and Migrate Data** - Rebalances volume by fixing the layout
+ changes and migrating the existing data.
-Rebalancing Volume to Fix Layout Changes
-----------------------------------------
+###Rebalancing Volume to Fix Layout Changes
Fixing the layout is necessary because the layout structure is static
for a given directory. In a scenario where new bricks have been added to
@@ -450,8 +383,7 @@ the servers.
# gluster volume rebalance test-volume fix-layout start
Starting rebalance on volume test-volume has been successful
-Rebalancing Volume to Fix Layout and Migrate Data
--------------------------------------------------
+###Rebalancing Volume to Fix Layout and Migrate Data
After expanding or shrinking a volume (using the add-brick and
remove-brick commands respectively), you need to rebalance the data
@@ -479,14 +411,11 @@ among the servers.
# gluster volume rebalance test-volume start force
Starting rebalancing on volume test-volume has been successful
-Displaying Status of Rebalance Operation
-----------------------------------------
+###Displaying Status of Rebalance Operation
You can display the status information about rebalance volume operation,
as needed.
-**To view status of rebalance volume**
-
- Check the status of the rebalance operation, using the following
command:
@@ -520,13 +449,10 @@ as needed.
--------- ---------------- ---- ------- -----------
617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed
-Stopping Rebalance Operation
-----------------------------
+###Stopping Rebalance Operation
You can stop the rebalance operation, as needed.
-**To stop rebalance**
-
- Stop the rebalance operation using the following command:
`# gluster volume rebalance stop`
@@ -539,10 +465,8 @@ You can stop the rebalance operation, as needed.
617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped
Stopped rebalance process on volume test-volume
-Stopping Volumes
-================
-
-To stop a volume
+<a name="stopping-volumes" />
+##Stopping Volumes
1. Stop the volume using the following command:
@@ -558,10 +482,8 @@ To stop a volume
Stopping volume test-volume has been successful
-Deleting Volumes
-================
-
-To delete a volume
+<a name="" />
+##Deleting Volumes
1. Delete the volume using the following command:
@@ -577,8 +499,8 @@ To delete a volume
Deleting volume test-volume has been successful
-Triggering Self-Heal on Replicate
-=================================
+<a name="self-heal" />
+##Triggering Self-Heal on Replicate
In replicate module, previously you had to manually trigger a self-heal
when a brick goes offline and comes back online, to bring all the
diff --git a/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md
index 0312bd048..c3ac0609b 100644
--- a/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md
+++ b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md
@@ -1,5 +1,4 @@
-Monitoring your GlusterFS Workload
-==================================
+#Monitoring your GlusterFS Workload
You can monitor the GlusterFS volumes on different parameters.
Monitoring volumes helps in capacity planning and performance tuning
@@ -14,8 +13,7 @@ performance needs to be probed.
You can also perform statedump of the brick processes and nfs server
process of a volume, and also view volume status and volume information.
-Running GlusterFS Volume Profile Command
-========================================
+##Running GlusterFS Volume Profile Command
GlusterFS Volume Profile command provides an interface to get the
per-brick I/O information for each File Operation (FOP) of a volume. The
@@ -25,21 +23,17 @@ system.
This section describes how to run GlusterFS Volume Profile command by
performing the following operations:
-- ?
+- [Start Profiling](#start-profiling)
+- [Displaying the I/0 Information](#displaying-io)
+- [Stop Profiling](#stop-profiling)
-- ?
-
-- ?
-
-Start Profiling
----------------
+<a name="start-profiling" />
+###Start Profiling
You must start the Profiling to view the File Operation information for
each brick.
-**To start profiling:**
-
-- Start profiling using the following command:
+To start profiling, use following command:
`# gluster volume profile start `
@@ -52,17 +46,12 @@ When profiling on the volume is started, the following additional
options are displayed in the Volume Info:
diagnostics.count-fop-hits: on
-
diagnostics.latency-measurement: on
-Displaying the I/0 Information
-------------------------------
-
-You can view the I/O information of each brick.
-
-To display I/O information:
+<a name="displaying-io" />
+###Displaying the I/0 Information
-- Display the I/O information using the following command:
+You can view the I/O information of each brick by using the following command:
`# gluster volume profile info`
@@ -117,26 +106,23 @@ For example, to see the I/O information on test-volume:
BytesWritten : 195571980
-Stop Profiling
---------------
+<a name="stop-profiling" />
+###Stop Profiling
You can stop profiling the volume, if you do not need profiling
information anymore.
-**To stop profiling**
-
-- Stop profiling using the following command:
+Stop profiling using the following command:
`# gluster volume profile stop`
- For example, to stop profiling on test-volume:
+For example, to stop profiling on test-volume:
`# gluster volume profile stop`
`Profiling stopped on test-volume`
-Running GlusterFS Volume TOP Command
-====================================
+##Running GlusterFS Volume TOP Command
GlusterFS Volume Top command allows you to view the glusterfs bricks’
performance metrics like read, write, file open calls, file read calls,
@@ -146,22 +132,16 @@ top command displays up to 100 results.
This section describes how to run and view the results for the following
GlusterFS Top commands:
-- ?
-
-- ?
-
-- ?
-
-- ?
-
-- ?
+- [Viewing Open fd Count and Maximum fd Count](#open-fd-count)
+- [Viewing Highest File Read Calls](#file-read)
+- [Viewing Highest File Write Calls](#file-write)
+- [Viewing Highest Open Calls on Directories](#open-dir)
+- [Viewing Highest Read Calls on Directory](#read-dir)
+- [Viewing List of Read Performance on each Brick](#read-perf)
+- [Viewing List of Write Performance on each Brick](#write-perf)
-- ?
-
-- ?
-
-Viewing Open fd Count and Maximum fd Count
-------------------------------------------
+<a name="open-fd-count" />
+###Viewing Open fd Count and Maximum fd Count
You can view both current open fd count (list of files that are
currently the most opened and the count) on the brick and the maximum
@@ -171,8 +151,6 @@ servers are up and running). If the brick name is not specified, then
open fd metrics of all the bricks belonging to the volume will be
displayed.
-**To view open fd count and maximum fd count:**
-
- View open fd count and maximum fd count using the following command:
`# gluster volume top open [brick ] [list-cnt ]`
@@ -221,14 +199,12 @@ displayed.
9 /clients/client8/~dmtmp/PARADOX/
STUDENTS.DB
-Viewing Highest File Read Calls
--------------------------------
+<a name="file-read" />
+###Viewing Highest File Read Calls
You can view highest read calls on each brick. If brick name is not
specified, then by default, list of 100 files will be displayed.
-**To view highest file Read calls:**
-
- View highest file Read calls using the following command:
`# gluster volume top read [brick ] [list-cnt ] `
@@ -265,15 +241,13 @@ specified, then by default, list of 100 files will be displayed.
54 /clients/client8/~dmtmp/SEED/LARGE.FIL
-Viewing Highest File Write Calls
---------------------------------
+<a name="file-write" />
+###Viewing Highest File Write Calls
You can view list of files which has highest file write calls on each
brick. If brick name is not specified, then by default, list of 100
files will be displayed.
-**To view highest file Write calls:**
-
- View highest file Write calls using the following command:
`# gluster volume top write [brick ] [list-cnt ] `
@@ -308,15 +282,13 @@ files will be displayed.
59 /clients/client3/~dmtmp/SEED/LARGE.FIL
-Viewing Highest Open Calls on Directories
------------------------------------------
+<a name="open-dir" />
+###Viewing Highest Open Calls on Directories
You can view list of files which has highest open calls on directories
of each brick. If brick name is not specified, then the metrics of all
the bricks belonging to that volume will be displayed.
-To view list of open calls on each directory
-
- View list of open calls on each directory using the following
command:
@@ -353,15 +325,13 @@ To view list of open calls on each directory
402 /clients/client4/~dmtmp
-Viewing Highest Read Calls on Directory
----------------------------------------
+<a name="read-dir" />
+###Viewing Highest Read Calls on Directory
You can view list of files which has highest directory read calls on
each brick. If brick name is not specified, then the metrics of all the
bricks belonging to that volume will be displayed.
-**To view list of highest directory read calls on each brick**
-
- View list of highest directory read calls on each brick using the
following command:
@@ -398,8 +368,8 @@ bricks belonging to that volume will be displayed.
800 /clients/client4/~dmtmp
-Viewing List of Read Performance on each Brick
-----------------------------------------------
+<a name="read-perf" />
+###Viewing List of Read Performance on each Brick
You can view the read throughput of files on each brick. If brick name
is not specified, then the metrics of all the bricks belonging to that
@@ -443,8 +413,6 @@ volume will be displayed. The output will be the read throughput.
This command will initiate a dd for the specified count and block size
and measures the corresponding throughput.
-**To view list of read performance on each brick**
-
- View list of read performance on each brick using the following
command:
@@ -494,9 +462,8 @@ and measures the corresponding throughput.
2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
BASEMACH.DOC 15:39:09.336572
-
-Viewing List of Write Performance on each Brick
------------------------------------------------
+<a name="write-perf" />
+###Viewing List of Write Performance on each Brick
You can view list of write throughput of files on each brick. If brick
name is not specified, then the metrics of all the bricks belonging to
@@ -552,14 +519,11 @@ performance on each brick:
516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31
FASTENER.MDB 15:39:01.797317
-Displaying Volume Information
-=============================
+##Displaying Volume Information
You can display information about a specific volume, or all volumes, as
needed.
-**To display volume information**
-
- Display information about a specific volume using the following
command:
@@ -611,8 +575,7 @@ needed.
Bricks:
Brick: server:/brick6
-Performing Statedump on a Volume
-================================
+##Performing Statedump on a Volume
Statedump is a mechanism through which you can get details of all
internal variables and state of the glusterfs process at the time of
@@ -668,8 +631,7 @@ dumped:
`# gluster volume info `
-Displaying Volume Status
-========================
+##Displaying Volume Status
You can display the status information about a specific volume, brick or
all volumes, as needed. Status information can be used to understand the
diff --git a/doc/admin-guide/en-US/markdown/admin_puppet.md b/doc/admin-guide/en-US/markdown/admin_puppet.md
new file mode 100644
index 000000000..103449be0
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_puppet.md
@@ -0,0 +1,499 @@
+#Puppet-Gluster
+<!---
+GlusterFS module by James
+Copyright (C) 2010-2013+ James Shubin
+Written by James Shubin <james@shubin.ca>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+-->
+##A GlusterFS Puppet module by [James](https://ttboj.wordpress.com/)
+####Available from:
+####[https://github.com/purpleidea/puppet-gluster/](https://github.com/purpleidea/puppet-gluster/)
+
+####Also available from:
+####[https://forge.gluster.org/puppet-gluster/](https://forge.gluster.org/puppet-gluster/)
+
+####Table of Contents
+
+1. [Overview](#overview)
+2. [Module description - What the module does](#module-description)
+3. [Setup - Getting started with Puppet-Gluster](#setup)
+ * [What can Puppet-Gluster manage?](#what-can-puppet-gluster-manage)
+ * [Simple setup](#simple-setup)
+ * [Elastic setup](#elastic-setup)
+ * [Advanced setup](#advanced-setup)
+4. [Usage/FAQ - Notes on management and frequently asked questions](#usage-and-frequently-asked-questions)
+5. [Reference - Class and type reference](#reference)
+ * [gluster::simple](#glustersimple)
+ * [gluster::elastic](#glusterelastic)
+ * [gluster::server](#glusterserver)
+ * [gluster::host](#glusterhost)
+ * [gluster::brick](#glusterbrick)
+ * [gluster::volume](#glustervolume)
+ * [gluster::volume::property](#glustervolumeproperty)
+6. [Examples - Example configurations](#examples)
+7. [Limitations - Puppet versions, OS compatibility, etc...](#limitations)
+8. [Development - Background on module development](#development)
+9. [Author - Author and contact information](#author)
+
+##Overview
+
+The Puppet-Gluster module installs, configures, and manages a GlusterFS cluster.
+
+##Module Description
+
+This Puppet-Gluster module handles installation, configuration, and management
+of GlusterFS across all of the hosts in the cluster.
+
+##Setup
+
+###What can Puppet-Gluster manage?
+
+Puppet-Gluster is designed to be able to manage as much or as little of your
+GlusterFS cluster as you wish. All features are optional. If there is a feature
+that doesn't appear to be optional, and you believe it should be, please let me
+know. Having said that, it makes good sense to me to have Puppet-Gluster manage
+as much of your GlusterFS infrastructure as it can. At the moment, it cannot
+rack new servers, but I am accepting funding to explore this feature ;) At the
+moment it can manage:
+
+* GlusterFS packages (rpm)
+* GlusterFS configuration files (/var/lib/glusterd/)
+* GlusterFS host peering (gluster peer probe)
+* GlusterFS storage partitioning (fdisk)
+* GlusterFS storage formatting (mkfs)
+* GlusterFS brick creation (mkdir)
+* GlusterFS services (glusterd)
+* GlusterFS firewalling (whitelisting)
+* GlusterFS volume creation (gluster volume create)
+* GlusterFS volume state (started/stopped)
+* GlusterFS volume properties (gluster volume set)
+* And much more...
+
+###Simple setup
+
+include '::gluster::simple' is enough to get you up and running. When using the
+gluster::simple class, or with any other Puppet-Gluster configuration,
+identical definitions must be used on all hosts in the cluster. The simplest
+way to accomplish this is with a single shared puppet host definition like:
+
+```puppet
+node /^annex\d+$/ { # annex{1,2,..N}
+ class { '::gluster::simple':
+ }
+}
+```
+
+If you wish to pass in different parameters, you can specify them in the class
+before you provision your hosts:
+
+```puppet
+class { '::gluster::simple':
+ replica => 2,
+ volume => ['volume1', 'volume2', 'volumeN'],
+}
+```
+
+###Elastic setup
+
+The gluster::elastic class is not yet available. Stay tuned!
+
+###Advanced setup
+
+Some system administrators may wish to manually itemize each of the required
+components for the Puppet-Gluster deployment. This happens automatically with
+the higher level modules, but may still be a desirable feature, particularly
+for non-elastic storage pools where the configuration isn't expected to change
+very often (if ever).
+
+To put together your cluster piece by piece, you must manually include and
+define each class and type that you wish to use. If there are certain aspects
+that you wish to manage yourself, you can omit them from your configuration.
+See the [reference](#reference) section below for the specifics. Here is one
+possible example:
+
+```puppet
+class { '::gluster::server':
+ shorewall => true,
+}
+
+gluster::host { 'annex1.example.com':
+ # use uuidgen to make these
+ uuid => '1f660ca2-2c78-4aa0-8f4d-21608218c69c',
+}
+
+# note that this is using a folder on your existing file system...
+# this can be useful for prototyping gluster using virtual machines
+# if this isn't a separate partition, remember that your root fs will
+# run out of space when your gluster volume does!
+gluster::brick { 'annex1.example.com:/data/gluster-storage1':
+ areyousure => true,
+}
+
+gluster::host { 'annex2.example.com':
+ # NOTE: specifying a host uuid is now optional!
+ # if you don't choose one, one will be assigned
+ #uuid => '2fbe6e2f-f6bc-4c2d-a301-62fa90c459f8',
+}
+
+gluster::brick { 'annex2.example.com:/data/gluster-storage2':
+ areyousure => true,
+}
+
+$brick_list = [
+ 'annex1.example.com:/data/gluster-storage1',
+ 'annex2.example.com:/data/gluster-storage2',
+]
+
+gluster::volume { 'examplevol':
+ replica => 2,
+ bricks => $brick_list,
+ start => undef, # i'll start this myself
+}
+
+# namevar must be: <VOLNAME>#<KEY>
+gluster::volume::property { 'examplevol#auth.reject':
+ value => ['192.0.2.13', '198.51.100.42', '203.0.113.69'],
+}
+```
+
+##Usage and frequently asked questions
+
+All management should be done by manipulating the arguments on the appropriate
+Puppet-Gluster classes and types. Since certain manipulations are either not
+yet possible with Puppet-Gluster, or are not supported by GlusterFS, attempting
+to manipulate the Puppet configuration in an unsupported way will result in
+undefined behaviour, and possible even data loss, however this is unlikely.
+
+###How do I change the replica count?
+
+You must set this before volume creation. This is a limitation of GlusterFS.
+There are certain situations where you can change the replica count by adding
+a multiple of the existing brick count to get this desired effect. These cases
+are not yet supported by Puppet-Gluster. If you want to use Puppet-Gluster
+before and / or after this transition, you can do so, but you'll have to do the
+changes manually.
+
+###Do I need to use a virtual IP?
+
+Using a virtual IP (VIP) is strongly recommended as a distributed lock manager
+(DLM) and also to provide a highly-available (HA) IP address for your clients
+to connect to. For a more detailed explanation of the reasoning please see:
+
+[https://ttboj.wordpress.com/2012/08/23/how-to-avoid-cluster-race-conditions-or-how-to-implement-a-distributed-lock-manager-in-puppet/](https://ttboj.wordpress.com/2012/08/23/how-to-avoid-cluster-race-conditions-or-how-to-implement-a-distributed-lock-manager-in-puppet/)
+
+Remember that even if you're using a hosted solution (such as AWS) that doesn't
+provide an additional IP address, or you want to avoid using an additional IP,
+and you're okay not having full HA client mounting, you can use an unused
+private RFC1918 IP address as the DLM VIP. Remember that a layer 3 IP can
+co-exist on the same layer 2 network with the layer 3 network that is used by
+your cluster.
+
+###Is it possible to have Puppet-Gluster complete in a single run?
+
+No. This is a limitation of Puppet, and is related to how GlusterFS operates.
+For example, it is not reliably possible to predict which ports a particular
+GlusterFS volume will run on until after the volume is started. As a result,
+this module will initially whitelist connections from GlusterFS host IP
+addresses, and then further restrict this to only allow individual ports once
+this information is known. This is possible in conjunction with the
+[puppet-shorewall](https://github.com/purpleidea/puppet-shorewall) module.
+You should notice that each run should complete without error. If you do see an
+error, it means that either something is wrong with your system and / or
+configuration, or because there is a bug in Puppet-Gluster.
+
+###Can you integrate this with vagrant?
+
+Not until vagrant properly supports libvirt/KVM. I have no desire to use
+VirtualBox for fun.
+
+###Awesome work, but it's missing support for a feature and/or platform!
+
+Since this is an Open Source / Free Software project that I also give away for
+free (as in beer, free as in gratis, free as in libre), I'm unable to provide
+unlimited support. Please consider donating funds, hardware, virtual machines,
+and other resources. For specific needs, you could perhaps sponsor a feature!
+
+###You didn't answer my question, or I have a question!
+
+Contact me through my [technical blog](https://ttboj.wordpress.com/contact/)
+and I'll do my best to help. If you have a good question, please remind me to
+add my answer to this documentation!
+
+##Reference
+Please note that there are a number of undocumented options. For more
+information on these options, please view the source at:
+[https://github.com/purpleidea/puppet-gluster/](https://github.com/purpleidea/puppet-gluster/).
+If you feel that a well used option needs documenting here, please contact me.
+
+###Overview of classes and types
+
+* [gluster::simple](#glustersimple): Simple Puppet-Gluster deployment.
+* [gluster::elastic](#glusterelastic): Under construction.
+* [gluster::server](#glusterserver): Base class for server hosts.
+* [gluster::host](#glusterhost): Host type for each participating host.
+* [gluster::brick](#glusterbrick): Brick type for each defined brick, per host.
+* [gluster::volume](#glustervolume): Volume type for each defined volume.
+* [gluster::volume::property](#glustervolumeproperty): Manages properties for each volume.
+
+###gluster::simple
+This is gluster::simple. It should probably take care of 80% of all use cases.
+It is particularly useful for deploying quick test clusters. It uses a
+finite-state machine (FSM) to decide when the cluster has settled and volume
+creation can begin. For more information on the FSM in Puppet-Gluster see:
+[https://ttboj.wordpress.com/2013/09/28/finite-state-machines-in-puppet/](https://ttboj.wordpress.com/2013/09/28/finite-state-machines-in-puppet/)
+
+####`replica`
+The replica count. Can't be changed automatically after initial deployment.
+
+####`volume`
+The volume name or list of volume names to create.
+
+####`path`
+The valid brick path for each host. Defaults to local file system. If you need
+a different path per host, then Gluster::Simple will not meet your needs.
+
+####`vip`
+The virtual IP address to be used for the cluster distributed lock manager.
+
+####`shorewall`
+Boolean to specify whether puppet-shorewall integration should be used or not.
+
+###gluster::elastic
+Under construction.
+
+###gluster::server
+Main server class for the cluster. Must be included when building the GlusterFS
+cluster manually. Wrapper classes such as [gluster::simple](#glustersimple)
+include this automatically.
+
+####`vip`
+The virtual IP address to be used for the cluster distributed lock manager.
+
+####`shorewall`
+Boolean to specify whether puppet-shorewall integration should be used or not.
+
+###gluster::host
+Main host type for the cluster. Each host participating in the GlusterFS
+cluster must define this type on itself, and on every other host. As a result,
+this is not a singleton like the [gluster::server](#glusterserver) class.
+
+####`ip`
+Specify which IP address this host is using. This defaults to the
+_$::ipaddress_ variable. Be sure to set this manually if you're declaring this
+yourself on each host without using exported resources. If each host thinks the
+other hosts should have the same IP address as itself, then Puppet-Gluster and
+GlusterFS won't work correctly.
+
+####`uuid`
+Universally unique identifier (UUID) for the host. If empty, Puppet-Gluster
+will generate this automatically for the host. You can generate your own
+manually with _uuidgen_, and set them yourself. I found this particularly
+useful for testing, because I would pick easy to recognize UUID's like:
+_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa_,
+_bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb_, and so on. If you set a UUID manually,
+and Puppet-Gluster has a chance to run, then it will remember your choice, and
+store it locally to be used again if you no longer specify the UUID. This is
+particularly useful for upgrading an existing un-managed GlusterFS installation
+to a Puppet-Gluster managed one, without changing any UUID's.
+
+###gluster::brick
+Main brick type for the cluster. Each brick is an individual storage segment to
+be used on a host. Each host must have at least one brick to participate in the
+cluster, but usually a host will have multiple bricks. A brick can be as simple
+as a file system folder, or it can be a separate file system. Please read the
+official GlusterFS documentation, if you aren't entirely comfortable with the
+concept of a brick.
+
+For most test clusters, and for experimentation, it is easiest to use a
+directory on the root file system. You can even use a _/tmp_ sub folder if you
+don't care about the persistence of your data. For more serious clusters, you
+might want to create separate file systems for your data. On self-hosted iron,
+it is not uncommon to create multiple RAID-6 drive pools, and to then create a
+separate file system per virtual drive. Each file system can then be used as a
+single brick.
+
+So that each volume in GlusterFS has the maximum ability to grow, without
+having to partition storage separately, the bricks in Puppet-Gluster are
+actually folders (on whatever backing store you wish) which then contain
+sub folders-- one for each volume. As a result, all the volumes on a given
+GlusterFS cluster can share the total available storage space. If you wish to
+limit the storage used by each volume, you can setup quotas. Alternatively, you
+can buy more hardware, and elastically grow your GlusterFS volumes, since the
+price per GB will be significantly less than any proprietary storage system.
+The one downside to this brick sharing, is that if you have chosen the brick
+per host count specifically to match your performance requirements, and
+each GlusterFS volume on the same cluster has drastically different brick per
+host performance requirements, then this won't suit your needs. I doubt that
+anyone actually has such requirements, but if you do insist on needing this
+compartmentalization, then you can probably use the Puppet-Gluster grouping
+feature to accomplish this goal. Please let me know about your use-case, and
+be warned that the grouping feature hasn't been extensively tested.
+
+To prove to you that I care about automation, this type offers the ability to
+automatically partition and format your file systems. This means you can plug
+in new iron, boot, provision and configure the entire system automatically.
+Regrettably, I don't have a lot of test hardware to routinely use this feature.
+If you'd like to donate some, I'd be happy to test this thoroughly. Having said
+that, I have used this feature, I consider it to be extremely safe, and it has
+never caused me to lose data. If you're uncertain, feel free to look at the
+code, or avoid using this feature entirely. If you think there's a way to make
+it even safer, then feel free to let me know.
+
+####`dev`
+Block device, such as _/dev/sdc_ or _/dev/disk/by-id/scsi-0123456789abcdef_. By
+default, Puppet-Gluster will assume you're using a folder to store the brick
+data, if you don't specify this parameter.
+
+####`fsuuid`
+File system UUID. This ensures we can distinctly identify a file system. You
+can set this to be used with automatic file system creation, or you can specify
+the file system UUID that you'd like to use.
+
+####`labeltype`
+Only _gpt_ is supported. Other options include _msdos_, but this has never been
+used because of it's size limitations.
+
+####`fstype`
+This should be _xfs_ or _ext4_. Using _xfs_ is recommended, but _ext4_ is also
+quite common. This only affects a file system that is getting created by this
+module. If you provision a new machine, with a root file system of _ext4_, and
+the brick you create is a root file system path, then this option does nothing.
+
+####`xfs_inode64`
+Set _inode64_ mount option when using the _xfs_ fstype. Choose _true_ to set.
+
+####`xfs_nobarrier`
+Set _nobarrier_ mount option when using the _xfs_ fstype. Choose _true_ to set.
+
+####`ro`
+Whether the file system should be mounted read only. For emergencies only.
+
+####`force`
+If _true_, this will overwrite any xfs file system it sees. This is useful for
+rebuilding GlusterFS repeatedly and wiping data. There are other safeties in
+place to stop this. In general, you probably don't ever want to touch this.
+
+####`areyousure`
+Do you want to allow Puppet-Gluster to do dangerous things? You have to set
+this to _true_ to allow Puppet-Gluster to _fdisk_ and _mkfs_ your file system.
+
+###gluster::volume
+Main volume type for the cluster. This is where a lot of the magic happens.
+Remember that changing some of these parameters after the volume has been
+created won't work, and you'll experience undefined behaviour. There could be
+FSM based error checking to verify that no changes occur, but it has been left
+out so that this code base can eventually support such changes, and so that the
+user can manually change a parameter if they know that it is safe to do so.
+
+####`bricks`
+List of bricks to use for this volume. If this is left at the default value of
+_true_, then this list is built automatically. The algorithm that determines
+this order does not support all possible situations, and most likely can't
+handle certain corner cases. It is possible to examine the FSM to view the
+selected brick order before it has a chance to create the volume. The volume
+creation script won't run until there is a stable brick list as seen by the FSM
+running on the host that has the DLM. If you specify this list of bricks
+manually, you must choose the order to match your desired volume layout. If you
+aren't sure about how to order the bricks, you should review the GlusterFS
+documentation first.
+
+####`transport`
+Only _tcp_ is supported. Possible values can include _rdma_, but this won't get
+any testing if I don't have access to infiniband hardware. Donations welcome.
+
+####`replica`
+Replica count. Usually you'll want to set this to _2_. Some users choose _3_.
+Other values are seldom seen. A value of _1_ can be used for simply testing a
+distributed setup, when you don't care about your data or high availability. A
+value greater than _4_ is probably wasteful and unnecessary. It might even
+cause performance issues if a synchronous write is waiting on a slow fourth
+server.
+
+####`stripe`
+Stripe count. Thoroughly unsupported and untested option. Not recommended for
+use by GlusterFS.
+
+####`ping`
+Do we want to include ping checks with _fping_?
+
+####`settle`
+Do we want to run settle checks?
+
+####`start`
+Requested state for the volume. Valid values include: _true_ (start), _false_
+(stop), or _undef_ (un-managed start/stop state).
+
+###gluster::volume::property
+Main volume property type for the cluster. This allows you to manage GlusterFS
+volume specific properties. There are a wide range of properties that volumes
+support. For the full list of properties, you should consult the GlusterFS
+documentation, or run the _gluster volume set help_ command. To set a property
+you must use the special name pattern of: _volume_#_key_. The value argument is
+used to set the associated value. It is smart enough to accept values in the
+most logical format for that specific property. Some properties aren't yet
+supported, so please report any problems you have with this functionality.
+Because this feature is an awesome way to _document as code_ the volume
+specific optimizations that you've made, make sure you use this feature even if
+you don't use all the others.
+
+####`value`
+The value to be used for this volume property.
+
+##Examples
+For example configurations, please consult the [examples/](https://github.com/purpleidea/puppet-gluster/tree/master/examples) directory in the git
+source repository. It is available from:
+
+[https://github.com/purpleidea/puppet-gluster/tree/master/examples](https://github.com/purpleidea/puppet-gluster/tree/master/examples)
+
+It is also available from:
+
+[https://forge.gluster.org/puppet-gluster/puppet-gluster/trees/master/examples](https://forge.gluster.org/puppet-gluster/puppet-gluster/trees/master/examples/)
+
+##Limitations
+
+This module has been tested against open source Puppet 3.2.4 and higher.
+
+The module has been tested on:
+
+* CentOS 6.4
+
+It will probably work without incident or without major modification on:
+
+* CentOS 5.x/6.x
+* RHEL 5.x/6.x
+
+It will most likely work with other Puppet versions and on other platforms, but
+testing under other conditions has been light due to lack of resources. It will
+most likely not work on Debian/Ubuntu systems without modification. I would
+really love to add support for these operating systems, but I do not have any
+test resources to do so. Please sponsor this if you'd like to see it happen.
+
+##Development
+
+This is my personal project that I work on in my free time.
+Donations of funding, hardware, virtual machines, and other resources are
+appreciated. Please contact me if you'd like to sponsor a feature, invite me to
+talk/teach or for consulting.
+
+You can follow along [on my technical blog](https://ttboj.wordpress.com/).
+
+##Author
+
+Copyright (C) 2010-2013+ James Shubin
+
+* [github](https://github.com/purpleidea/)
+* [@purpleidea](https://twitter.com/#!/purpleidea)
+* [https://ttboj.wordpress.com/](https://ttboj.wordpress.com/)
+
diff --git a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md
index 4038523c8..455238048 100644
--- a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md
+++ b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md
@@ -1,5 +1,4 @@
-Setting up GlusterFS Server Volumes
-===================================
+#Setting up GlusterFS Server Volumes
A volume is a logical collection of bricks where each brick is an export
directory on a server in the trusted storage pool. Most of the gluster
@@ -12,51 +11,46 @@ start it before attempting to mount it.
- Volumes of the following types can be created in your storage
environment:
- - Distributed - Distributed volumes distributes files throughout
+ - **Distributed** - Distributed volumes distributes files throughout
the bricks in the volume. You can use distributed volumes where
the requirement is to scale storage and the redundancy is either
not important or is provided by other hardware/software layers.
- For more information, see ? .
- - Replicated – Replicated volumes replicates files across bricks
+ - **Replicated** – Replicated volumes replicates files across bricks
in the volume. You can use replicated volumes in environments
- where high-availability and high-reliability are critical. For
- more information, see ?.
+ where high-availability and high-reliability are critical.
- - Striped – Striped volumes stripes data across bricks in the
+ - **Striped** – Striped volumes stripes data across bricks in the
volume. For best results, you should use striped volumes only in
- high concurrency environments accessing very large files. For
- more information, see ?.
+ high concurrency environments accessing very large files.
- - Distributed Striped - Distributed striped volumes stripe data
+ - **Distributed Striped** - Distributed striped volumes stripe data
across two or more nodes in the cluster. You should use
distributed striped volumes where the requirement is to scale
storage and in high concurrency environments accessing very
- large files is critical. For more information, see ?.
+ large files is critical.
- - Distributed Replicated - Distributed replicated volumes
+ - **Distributed Replicated** - Distributed replicated volumes
distributes files across replicated bricks in the volume. You
can use distributed replicated volumes in environments where the
requirement is to scale storage and high-reliability is
critical. Distributed replicated volumes also offer improved
- read performance in most environments. For more information, see
- ?.
+ read performance in most environments.
- - Distributed Striped Replicated – Distributed striped replicated
+ - **Distributed Striped Replicated** – Distributed striped replicated
volumes distributes striped data across replicated bricks in the
cluster. For best results, you should use distributed striped
replicated volumes in highly concurrent environments where
parallel access of very large files and performance is critical.
In this release, configuration of this volume type is supported
- only for Map Reduce workloads. For more information, see ?.
+ only for Map Reduce workloads.
- - Striped Replicated – Striped replicated volumes stripes data
+ - **Striped Replicated** – Striped replicated volumes stripes data
across replicated bricks in the cluster. For best results, you
should use striped replicated volumes in highly concurrent
environments where there is parallel access of very large files
and performance is critical. In this release, configuration of
- this volume type is supported only for Map Reduce workloads. For
- more information, see ?.
+ this volume type is supported only for Map Reduce workloads.
**To create a new volume**
@@ -71,16 +65,14 @@ start it before attempting to mount it.
Creation of test-volume has been successful
Please start the volume to access data.
-Creating Distributed Volumes
-============================
+##Creating Distributed Volumes
In a distributed volumes files are spread randomly across the bricks in
the volume. Use distributed volumes where you need to scale storage and
redundancy is either not important or is provided by other
hardware/software layers.
-> **Note**
->
+> **Note**:
> Disk/server failure in distributed volumes can result in a serious
> loss of data because directory contents are spread randomly across the
> bricks in the volume.
@@ -89,7 +81,7 @@ hardware/software layers.
**To create a distributed volume**
-1. Create a trusted storage pool as described earlier in ?.
+1. Create a trusted storage pool.
2. Create the distributed volume:
@@ -125,23 +117,19 @@ hardware/software layers.
If the transport type is not specified, *tcp* is used as the
default. You can also set additional options if required, such as
- auth.allow or auth.reject. For more information, see ?
+ auth.allow or auth.reject.
- > **Note**
- >
+ > **Note**:
> Make sure you start your volumes before you try to mount them or
- > else client operations after the mount will hang, see ? for
- > details.
+ > else client operations after the mount will hang.
-Creating Replicated Volumes
-===========================
+##Creating Replicated Volumes
Replicated volumes create copies of files across multiple bricks in the
volume. You can use replicated volumes in environments where
high-availability and high-reliability are critical.
-> **Note**
->
+> **Note**:
> The number of bricks should be equal to of the replica count for a
> replicated volume. To protect against server and disk failures, it is
> recommended that the bricks of the volume are from different servers.
@@ -150,7 +138,7 @@ high-availability and high-reliability are critical.
**To create a replicated volume**
-1. Create a trusted storage pool as described earlier in ?.
+1. Create a trusted storage pool.
2. Create the replicated volume:
@@ -164,23 +152,19 @@ high-availability and high-reliability are critical.
If the transport type is not specified, *tcp* is used as the
default. You can also set additional options if required, such as
- auth.allow or auth.reject. For more information, see ?
+ auth.allow or auth.reject.
- > **Note**
- >
+ > **Note**:
> Make sure you start your volumes before you try to mount them or
- > else client operations after the mount will hang, see ? for
- > details.
+ > else client operations after the mount will hang.
-Creating Striped Volumes
-========================
+##Creating Striped Volumes
Striped volumes stripes data across bricks in the volume. For best
results, you should use striped volumes only in high concurrency
environments accessing very large files.
-> **Note**
->
+> **Note**:
> The number of bricks should be a equal to the stripe count for a
> striped volume.
@@ -188,7 +172,7 @@ environments accessing very large files.
**To create a striped volume**
-1. Create a trusted storage pool as described earlier in ?.
+1. Create a trusted storage pool.
2. Create the striped volume:
@@ -202,24 +186,20 @@ environments accessing very large files.
If the transport type is not specified, *tcp* is used as the
default. You can also set additional options if required, such as
- auth.allow or auth.reject. For more information, see ?
+ auth.allow or auth.reject.
- > **Note**
- >
+ > **Note**:
> Make sure you start your volumes before you try to mount them or
- > else client operations after the mount will hang, see ? for
- > details.
+ > else client operations after the mount will hang.
-Creating Distributed Striped Volumes
-====================================
+##Creating Distributed Striped Volumes
Distributed striped volumes stripes files across two or more nodes in
the cluster. For best results, you should use distributed striped
volumes where the requirement is to scale storage and in high
concurrency environments accessing very large files is critical.
-> **Note**
->
+> **Note**:
> The number of bricks should be a multiple of the stripe count for a
> distributed striped volume.
@@ -227,7 +207,7 @@ concurrency environments accessing very large files is critical.
**To create a distributed striped volume**
-1. Create a trusted storage pool as described earlier in ?.
+1. Create a trusted storage pool.
2. Create the distributed striped volume:
@@ -242,16 +222,13 @@ concurrency environments accessing very large files is critical.
If the transport type is not specified, *tcp* is used as the
default. You can also set additional options if required, such as
- auth.allow or auth.reject. For more information, see ?
+ auth.allow or auth.reject.
- > **Note**
- >
+ > **Note**:
> Make sure you start your volumes before you try to mount them or
- > else client operations after the mount will hang, see ? for
- > details.
+ > else client operations after the mount will hang.
-Creating Distributed Replicated Volumes
-=======================================
+##Creating Distributed Replicated Volumes
Distributes files across replicated bricks in the volume. You can use
distributed replicated volumes in environments where the requirement is
@@ -259,8 +236,7 @@ to scale storage and high-reliability is critical. Distributed
replicated volumes also offer improved read performance in most
environments.
-> **Note**
->
+> **Note**:
> The number of bricks should be a multiple of the replica count for a
> distributed replicated volume. Also, the order in which bricks are
> specified has a great effect on data protection. Each replica\_count
@@ -274,7 +250,7 @@ environments.
**To create a distributed replicated volume**
-1. Create a trusted storage pool as described earlier in ?.
+1. Create a trusted storage pool.
2. Create the distributed replicated volume:
@@ -296,16 +272,13 @@ environments.
If the transport type is not specified, *tcp* is used as the
default. You can also set additional options if required, such as
- auth.allow or auth.reject. For more information, see ?
+ auth.allow or auth.reject.
- > **Note**
- >
+ > **Note**:
> Make sure you start your volumes before you try to mount them or
- > else client operations after the mount will hang, see ? for
- > details.
+ > else client operations after the mount will hang.
-Creating Distributed Striped Replicated Volumes
-===============================================
+##Creating Distributed Striped Replicated Volumes
Distributed striped replicated volumes distributes striped data across
replicated bricks in the cluster. For best results, you should use
@@ -314,14 +287,13 @@ where parallel access of very large files and performance is critical.
In this release, configuration of this volume type is supported only for
Map Reduce workloads.
-> **Note**
->
+> **Note**:
> The number of bricks should be a multiples of number of stripe count
> and replica count for a distributed striped replicated volume.
**To create a distributed striped replicated volume**
-1. Create a trusted storage pool as described earlier in ?.
+1. Create a trusted storage pool.
2. Create a distributed striped replicated volume using the following
command:
@@ -337,16 +309,13 @@ Map Reduce workloads.
If the transport type is not specified, *tcp* is used as the
default. You can also set additional options if required, such as
- auth.allow or auth.reject. For more information, see ?
+ auth.allow or auth.reject.
- > **Note**
- >
+ > **Note**:
> Make sure you start your volumes before you try to mount them or
- > else client operations after the mount will hang, see ? for
- > details.
+ > else client operations after the mount will hang.
-Creating Striped Replicated Volumes
-===================================
+##Creating Striped Replicated Volumes
Striped replicated volumes stripes data across replicated bricks in the
cluster. For best results, you should use striped replicated volumes in
@@ -354,8 +323,7 @@ highly concurrent environments where there is parallel access of very
large files and performance is critical. In this release, configuration
of this volume type is supported only for Map Reduce workloads.
-> **Note**
->
+> **Note**:
> The number of bricks should be a multiple of the replicate count and
> stripe count for a striped replicated volume.
@@ -366,8 +334,6 @@ of this volume type is supported only for Map Reduce workloads.
1. Create a trusted storage pool consisting of the storage servers that
will comprise the volume.
- For more information, see ?.
-
2. Create a striped replicated volume :
`# gluster volume create [stripe ] [replica ] [transport tcp | rdma | tcp,rdma] `
@@ -387,16 +353,13 @@ of this volume type is supported only for Map Reduce workloads.
If the transport type is not specified, *tcp* is used as the
default. You can also set additional options if required, such as
- auth.allow or auth.reject. For more information, see ?
+ auth.allow or auth.reject.
- > **Note**
- >
+ > **Note**:
> Make sure you start your volumes before you try to mount them or
- > else client operations after the mount will hang, see ? for
- > details.
+ > else client operations after the mount will hang.
-Starting Volumes
-================
+##Starting Volumes
You must start your volumes before you try to mount them.
@@ -411,9 +374,9 @@ You must start your volumes before you try to mount them.
# gluster volume start test-volume
Starting test-volume has been successful
- []: images/Distributed_Volume.png
- [1]: images/Replicated_Volume.png
- [2]: images/Striped_Volume.png
- [3]: images/Distributed_Striped_Volume.png
- [4]: images/Distributed_Replicated_Volume.png
- [5]: images/Striped_Replicated_Volume.png
+ []: ../images/Distributed_Volume.png
+ [1]: ../images/Replicated_Volume.png
+ [2]: ../images/Striped_Volume.png
+ [3]: ../images/Distributed_Striped_Volume.png
+ [4]: ../images/Distributed_Replicated_Volume.png
+ [5]: ../images/Striped_Replicated_Volume.png
diff --git a/doc/admin-guide/en-US/markdown/admin_settingup_clients.md b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md
index 85b28c952..bb45c8b89 100644
--- a/doc/admin-guide/en-US/markdown/admin_settingup_clients.md
+++ b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md
@@ -1,5 +1,4 @@
-Accessing Data - Setting Up GlusterFS Client
-============================================
+#Accessing Data - Setting Up GlusterFS Client
You can access gluster volumes in multiple ways. You can use Gluster
Native Client method for high concurrency, performance and transparent
@@ -13,8 +12,7 @@ You can use CIFS to access volumes when using Microsoft Windows as well
as SAMBA clients. For this access method, Samba packages need to be
present on the client side.
-Gluster Native Client
-=====================
+##Gluster Native Client
The Gluster Native Client is a FUSE-based client running in user space.
Gluster Native Client is the recommended method for accessing volumes
@@ -25,8 +23,7 @@ install the software on client machines. This section also describes how
to mount volumes on clients (both manually and automatically) and how to
verify that the volume has mounted successfully.
-Installing the Gluster Native Client
-------------------------------------
+###Installing the Gluster Native Client
Before you begin installing the Gluster Native Client, you need to
verify that the FUSE module is loaded on the client and has access to
@@ -39,7 +36,6 @@ the required modules as follows:
2. Verify that the FUSE module is loaded:
`# dmesg | grep -i fuse `
-
`fuse init (API version 7.13)`
### Installing on Red Hat Package Manager (RPM) Distributions
@@ -59,7 +55,6 @@ To install Gluster Native Client on RPM distribution-based systems
You can use the following chains with iptables:
`$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT `
-
`$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT`
> **Note**
@@ -80,9 +75,7 @@ To install Gluster Native Client on RPM distribution-based systems
4. Install Gluster Native Client on the client.
`$ sudo rpm -i glusterfs-3.3.0qa30-1.x86_64.rpm `
-
`$ sudo rpm -i glusterfs-fuse-3.3.0qa30-1.x86_64.rpm `
-
`$ sudo rpm -i glusterfs-rdma-3.3.0qa30-1.x86_64.rpm`
> **Note**
@@ -134,7 +127,6 @@ To install Gluster Native Client on Debian-based distributions
You can use the following chains with iptables:
`$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT `
-
`$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT`
> **Note**
@@ -150,7 +142,6 @@ To build and install Gluster Native Client from the source code
1. Create a new directory using the following commands:
`# mkdir glusterfs `
-
`# cd glusterfs`
2. Download the source code.
@@ -165,21 +156,14 @@ To build and install Gluster Native Client from the source code
`# ./configure `
- `GlusterFS configure summary `
-
- `================== `
-
- `FUSE client : yes `
-
- `Infiniband verbs : yes `
-
- `epoll IO multiplex : yes `
-
- `argp-standalone : no `
-
- `fusermount : no `
-
- `readline : yes`
+ GlusterFS configure summary
+ ===========================
+ FUSE client : yes
+ Infiniband verbs : yes
+ epoll IO multiplex : yes
+ argp-standalone : no
+ fusermount : no
+ readline : yes
The configuration summary shows the components that will be built
with Gluster Native Client.
@@ -188,7 +172,6 @@ To build and install Gluster Native Client from the source code
commands:
`# make `
-
`# make install`
6. Verify that the correct version of Gluster Native Client is
@@ -196,18 +179,13 @@ To build and install Gluster Native Client from the source code
`# glusterfs –-version`
-Mounting Volumes
-----------------
+##Mounting Volumes
After installing the Gluster Native Client, you need to mount Gluster
volumes to access data. There are two methods you can choose:
-- ?
-
-- ?
-
-After mounting a volume, you can test the mounted volume using the
-procedure described in ?.
+- [Manually Mounting Volumes](#manual-mount)
+- [Automatically Mounting Volumes](#auto-mount)
> **Note**
>
@@ -215,10 +193,9 @@ procedure described in ?.
> in the client machine. You can use appropriate /etc/hosts entries or
> DNS server to resolve server names to IP addresses.
+<a name="manual-mount" />
### Manually Mounting Volumes
-To manually mount a Gluster volume
-
- To mount a volume, use the following command:
`# mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR`
@@ -272,6 +249,7 @@ attempts to fetch volume files while mounting a volume. This option is
useful when you mount a server with multiple IP addresses or when
round-robin DNS is configured for the server-name..
+<a name="auto-mount" />
### Automatically Mounting Volumes
You can configure your system to automatically mount the Gluster volume
@@ -282,8 +260,6 @@ gluster configuration volfile describing the volume name. Subsequently,
the client will communicate directly with the servers mentioned in the
volfile (which might not even include the one used for mount).
-**To automatically mount a Gluster volume**
-
- To mount a volume, edit the /etc/fstab file and add the following
line:
@@ -337,17 +313,14 @@ To test mounted volumes
following:
`# cd MOUNTDIR `
-
`# ls`
- For example,
`# cd /mnt/glusterfs `
-
`# ls`
-NFS
-===
+#NFS
You can use NFS v3 to access to gluster volumes. Extensive testing has
be done on GNU/Linux clients and NFS implementation in other operating
@@ -366,26 +339,23 @@ This section describes how to use NFS to mount Gluster volumes (both
manually and automatically) and how to verify that the volume has been
mounted successfully.
-Using NFS to Mount Volumes
+##Using NFS to Mount Volumes
--------------------------
You can use either of the following methods to mount Gluster volumes:
-- ?
-
-- ?
+- [Manually Mounting Volumes Using NFS](#manual-nfs)
+- [Automatically Mounting Volumes Using NFS](#auto-nfs)
**Prerequisite**: Install nfs-common package on both servers and clients
(only for Debian-based distribution), using the following command:
`$ sudo aptitude install nfs-common `
-After mounting a volume, you can test the mounted volume using the
-procedure described in ?.
-
+<a name="manual-nfs" />
### Manually Mounting Volumes Using NFS
-To manually mount a Gluster volume using NFS
+**To manually mount a Gluster volume using NFS**
- To mount a volume, use the following command:
@@ -423,6 +393,7 @@ To manually mount a Gluster volume using NFS
` # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs`
+<a name="auto-nfs" />
### Automatically Mounting Volumes Using NFS
You can configure your system to automatically mount Gluster volumes
@@ -494,19 +465,9 @@ You can confirm that Gluster directories are mounting successfully.
following:
`# cd MOUNTDIR`
-
`# ls`
- For example,
-
- `
-
- `
-
- `# ls`
-
-CIFS
-====
+#CIFS
You can use CIFS to access to volumes when using Microsoft Windows as
well as SAMBA clients. For this access method, Samba packages need to be
@@ -523,21 +484,18 @@ verify that the volume has mounted successfully.
> can use the Mac OS X command line to access Gluster volumes using
> CIFS.
-Using CIFS to Mount Volumes
----------------------------
+##Using CIFS to Mount Volumes
You can use either of the following methods to mount Gluster volumes:
-- ?
-
-- ?
-
-After mounting a volume, you can test the mounted volume using the
-procedure described in ?.
+- [Exporting Gluster Volumes Through Samba](#export-samba)
+- [Manually Mounting Volumes Using CIFS](#cifs-manual)
+- [Automatically Mounting Volumes Using CIFS](#cifs-auto)
You can also use Samba for exporting Gluster Volumes through CIFS
protocol.
+<a name="export-samba" />
### Exporting Gluster Volumes Through Samba
We recommend you to use Samba for exporting Gluster volumes through the
@@ -545,8 +503,7 @@ CIFS protocol.
**To export volumes through CIFS protocol**
-1. Mount a Gluster volume. For more information on mounting volumes,
- see ?.
+1. Mount a Gluster volume.
2. Setup Samba configuration to export the mount point of the Gluster
volume.
@@ -575,6 +532,7 @@ scripts (/etc/init.d/smb [re]start).
> repeat these steps on each Gluster node. For more advanced
> configurations, see Samba documentation.
+<a name="cifs-manual" />
### Manually Mounting Volumes Using CIFS
You can manually mount Gluster volumes using CIFS on Microsoft
@@ -594,20 +552,10 @@ Windows-based client machines.
The network drive (mapped to the volume) appears in the Computer window.
-**Alternatively, to manually mount a Gluster volume using CIFS.**
-
-- Click **Start \> Run** and enter the following:
-
- `
-
- `
-
- For example:
-
- `
-
- `
+Alternatively, to manually mount a Gluster volume using CIFS by going to
+**Start \> Run** and entering Network path manually.
+<a name="cifs-auto" />
### Automatically Mounting Volumes Using CIFS
You can configure your system to automatically mount Gluster volumes
diff --git a/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md
index 43251cd01..a47ece8d9 100644
--- a/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md
+++ b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md
@@ -1,5 +1,4 @@
-Managing the glusterd Service
-=============================
+#Managing the glusterd Service
After installing GlusterFS, you must start glusterd service. The
glusterd service serves as the Gluster elastic volume manager,
@@ -10,16 +9,13 @@ servers non-disruptively.
This section describes how to start the glusterd service in the
following ways:
-- ?
+- [Starting and Stopping glusterd Manually](#manual)
+- [Starting glusterd Automatically](#auto)
-- ?
+> **Note**: You must start glusterd on all GlusterFS servers.
-> **Note**
->
-> You must start glusterd on all GlusterFS servers.
-
-Starting and Stopping glusterd Manually
-=======================================
+<a name="manual" />
+##Starting and Stopping glusterd Manually
This section describes how to start and stop glusterd manually
@@ -31,19 +27,13 @@ This section describes how to start and stop glusterd manually
`# /etc/init.d/glusterd stop`
-Starting glusterd Automatically
-===============================
+<a name="auto" />
+##Starting glusterd Automatically
This section describes how to configure the system to automatically
start the glusterd service every time the system boots.
-To automatically start the glusterd service every time the system boots,
-enter the following from the command line:
-
-`# chkconfig glusterd on `
-
-Red Hat-based Systems
----------------------
+###Red Hat and Fedora distros
To configure Red Hat-based systems to automatically start the glusterd
service every time the system boots, enter the following from the
@@ -51,8 +41,7 @@ command line:
`# chkconfig glusterd on `
-Debian-based Systems
---------------------
+###Debian and derivatives like Ubuntu
To configure Debian-based systems to automatically start the glusterd
service every time the system boots, enter the following from the
@@ -60,8 +49,7 @@ command line:
`# update-rc.d glusterd defaults`
-Systems Other than Red Hat and Debain
--------------------------------------
+###Systems Other than Red Hat and Debain
To configure systems other than Red Hat or Debian to automatically start
the glusterd service every time the system boots, enter the following
diff --git a/doc/admin-guide/en-US/markdown/admin_storage_pools.md b/doc/admin-guide/en-US/markdown/admin_storage_pools.md
index 2a35cbea5..a0d8837ff 100644
--- a/doc/admin-guide/en-US/markdown/admin_storage_pools.md
+++ b/doc/admin-guide/en-US/markdown/admin_storage_pools.md
@@ -1,5 +1,4 @@
-Setting up Trusted Storage Pools
-================================
+#Setting up Trusted Storage Pools
Before you can configure a GlusterFS volume, you must create a trusted
storage pool consisting of the storage servers that provides bricks to a
@@ -10,21 +9,18 @@ the first server, the storage pool consists of that server alone. To add
additional storage servers to the storage pool, you can use the probe
command from a storage server that is already trusted.
-> **Note**
->
-> Do not self-probe the first server/localhost.
+> **Note**: Do not self-probe the first server/localhost.
The GlusterFS service must be running on all storage servers that you
want to add to the storage pool. See ? for more information.
-Adding Servers to Trusted Storage Pool
-======================================
+##Adding Servers to Trusted Storage Pool
To create a trusted storage pool, add servers to the trusted storage
pool
-1. The hostnames used to create the storage pool must be resolvable by
- DNS.
+1. **The hostnames used to create the storage pool must be resolvable by
+ DNS**
To add a server to the storage pool:
@@ -42,8 +38,8 @@ pool
# gluster peer probe server4
Probe successful
-2. Verify the peer status from the first server using the following
- commands:
+2. **Verify the peer status from the first server using the following
+ commands:**
# gluster peer status
Number of Peers: 3
@@ -60,8 +56,7 @@ pool
Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7
State: Peer in Cluster (Connected)
-Removing Servers from the Trusted Storage Pool
-==============================================
+##Removing Servers from the Trusted Storage Pool
To remove a server from the storage pool:
diff --git a/doc/admin-guide/en-US/markdown/admin_troubleshooting.md b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md
index 88fb85c24..fa19a2f71 100644
--- a/doc/admin-guide/en-US/markdown/admin_troubleshooting.md
+++ b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md
@@ -1,60 +1,54 @@
-Troubleshooting GlusterFS
-=========================
+#Troubleshooting GlusterFS
This section describes how to manage GlusterFS logs and most common
troubleshooting scenarios related to GlusterFS.
-Managing GlusterFS Logs
-=======================
+##Contents
+* [Managing GlusterFS Logs](#logs)
+* [Troubleshooting Geo-replication](#georep)
+* [Troubleshooting POSIX ACLs](#posix-acls)
+* [Troubleshooting Hadoop Compatible Storage](#hadoop)
+* [Troubleshooting NFS](#nfs)
+* [Troubleshooting File Locks](#file-locks)
-This section describes how to manage GlusterFS logs by performing the
-following operation:
+<a name="logs" />
+##Managing GlusterFS Logs
-- Rotating Logs
-
-Rotating Logs
--------------
+###Rotating Logs
Administrators can rotate the log file in a volume, as needed.
**To rotate a log file**
-- Rotate the log file using the following command:
-
`# gluster volume log rotate `
- For example, to rotate the log file on test-volume:
+For example, to rotate the log file on test-volume:
- # gluster volume log rotate test-volume
- log rotate successful
+ # gluster volume log rotate test-volume
+ log rotate successful
- > **Note**
- >
- > When a log file is rotated, the contents of the current log file
- > are moved to log-file- name.epoch-time-stamp.
+> **Note**
+> When a log file is rotated, the contents of the current log file
+> are moved to log-file- name.epoch-time-stamp.
-Troubleshooting Geo-replication
-===============================
+<a name="georep" />
+##Troubleshooting Geo-replication
This section describes the most common troubleshooting scenarios related
to GlusterFS Geo-replication.
-Locating Log Files
-------------------
+###Locating Log Files
For every Geo-replication session, the following three log files are
associated to it (four, if the slave is a gluster volume):
-- Master-log-file - log file for the process which monitors the Master
+- **Master-log-file** - log file for the process which monitors the Master
volume
-
-- Slave-log-file - log file for process which initiates the changes in
+- **Slave-log-file** - log file for process which initiates the changes in
slave
-
-- Master-gluster-log-file - log file for the maintenance mount point
+- **Master-gluster-log-file** - log file for the maintenance mount point
that Geo-replication module uses to monitor the master volume
-
-- Slave-gluster-log-file - is the slave's counterpart of it
+- **Slave-gluster-log-file** - is the slave's counterpart of it
**Master Log File**
@@ -87,8 +81,7 @@ running on slave machine), use the following commands:
`/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log`
-Rotating Geo-replication Logs
------------------------------
+###Rotating Geo-replication Logs
Administrators can rotate the log file of a particular master-slave
session, as needed. When you run geo-replication's ` log-rotate`
@@ -128,8 +121,7 @@ log file.
# gluster volume geo-replication log rotate
log rotate successful
-Synchronization is not complete
--------------------------------
+###Synchronization is not complete
**Description**: GlusterFS Geo-replication did not synchronize the data
completely but still the geo- replication status displayed is OK.
@@ -138,39 +130,35 @@ completely but still the geo- replication status displayed is OK.
index and restarting GlusterFS Geo- replication. After restarting,
GlusterFS Geo-replication begins synchronizing all the data. All files
are compared using checksum, which can be a lengthy and high resource
-utilization operation on large data sets. If the error situation
-persists, contact Red Hat Support.
+utilization operation on large data sets.
-For more information about erasing index, see ?.
-Issues in Data Synchronization
-------------------------------
+###Issues in Data Synchronization
**Description**: Geo-replication display status as OK, but the files do
not get synced, only directories and symlink gets synced with the
following error message in the log:
-[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to
-sync ./some\_file\`
+ [2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to
+ sync ./some\_file\`
**Solution**: Geo-replication invokes rsync v3.0.0 or higher on the host
and the remote machine. You must verify if you have installed the
required version.
-Geo-replication status displays Faulty very often
--------------------------------------------------
+###Geo-replication status displays Faulty very often
**Description**: Geo-replication displays status as faulty very often
with a backtrace similar to the following:
-2011-04-28 14:06:18.378859] E [syncdutils:131:log\_raise\_exception]
-\<top\>: FAIL: Traceback (most recent call last): File
-"/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line
-152, in twraptf(\*aa) File
-"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in
-listen rid, exc, res = recv(self.inf) File
-"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 42, in
-recv return pickle.load(inf) EOFError
+ 2011-04-28 14:06:18.378859] E [syncdutils:131:log\_raise\_exception]
+ \<top\>: FAIL: Traceback (most recent call last): File
+ "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line
+ 152, in twraptf(\*aa) File
+ "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in
+ listen rid, exc, res = recv(self.inf) File
+ "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 42, in
+ recv return pickle.load(inf) EOFError
**Solution**: This error indicates that the RPC communication between
the master gsyncd module and slave gsyncd module is broken and this can
@@ -179,34 +167,28 @@ pre-requisites:
- Password-less SSH is set up properly between the host and the remote
machine.
-
- If FUSE is installed in the machine, because geo-replication module
mounts the GlusterFS volume using FUSE to sync data.
-
- If the **Slave** is a volume, check if that volume is started.
-
- If the Slave is a plain directory, verify if the directory has been
created already with the required permissions.
-
- If GlusterFS 3.2 or higher is not installed in the default location
(in Master) and has been prefixed to be installed in a custom
location, configure the `gluster-command` for it to point to the
exact location.
-
- If GlusterFS 3.2 or higher is not installed in the default location
(in slave) and has been prefixed to be installed in a custom
location, configure the `remote-gsyncd-command` for it to point to
the exact place where gsyncd is located.
-Intermediate Master goes to Faulty State
-----------------------------------------
+###Intermediate Master goes to Faulty State
**Description**: In a cascading set-up, the intermediate master goes to
faulty state with the following log:
-raise RuntimeError ("aborting on uuid change from %s to %s" % \\
-RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f-
-4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154
+ raise RuntimeError ("aborting on uuid change from %s to %s" % \\
+ RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f-
+ 4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154
**Solution**: In a cascading set-up the Intermediate master is loyal to
the original primary master. The above log means that the
@@ -214,50 +196,42 @@ geo-replication module has detected change in primary master. If this is
the desired behavior, delete the config option volume-id in the session
initiated from the intermediate master.
-Troubleshooting POSIX ACLs
-==========================
+<a name="posix-acls" />
+##Troubleshooting POSIX ACLs
This section describes the most common troubleshooting issues related to
POSIX ACLs.
-setfacl command fails with “setfacl: \<file or directory name\>: Operation not supported” error
------------------------------------------------------------------------------------------------
+ setfacl command fails with “setfacl: \<file or directory name\>: Operation not supported” error
You may face this error when the backend file systems in one of the
servers is not mounted with the "-o acl" option. The same can be
confirmed by viewing the following error message in the log file of the
server "Posix access control list is not supported".
-**Solution**: Remount the backend file system with "-o acl" option. For
-more information, see ?.
+**Solution**: Remount the backend file system with "-o acl" option.
-Troubleshooting Hadoop Compatible Storage
-=========================================
+<a name="hadoop" />
+##Troubleshooting Hadoop Compatible Storage
-This section describes the most common troubleshooting issues related to
-Hadoop Compatible Storage.
-
-Time Sync
----------
+###Time Sync
-Running MapReduce job may throw exceptions if the time is out-of-sync on
+**Problem**: Running MapReduce job may throw exceptions if the time is out-of-sync on
the hosts in the cluster.
**Solution**: Sync the time on all hosts using ntpd program.
-Troubleshooting NFS
-===================
+<a name="nfs" />
+##Troubleshooting NFS
This section describes the most common troubleshooting issues related to
NFS .
-mount command on NFS client fails with “RPC Error: Program not registered”
---------------------------------------------------------------------------
+###mount command on NFS client fails with “RPC Error: Program not registered”
-Start portmap or rpcbind service on the NFS server.
+ Start portmap or rpcbind service on the NFS server.
This error is encountered when the server has not started correctly.
-
On most Linux distributions this is fixed by starting portmap:
`$ /etc/init.d/portmap start`
@@ -270,8 +244,7 @@ following command is required:
After starting portmap or rpcbind, gluster NFS server needs to be
restarted.
-NFS server start-up fails with “Port is already in use” error in the log file."
--------------------------------------------------------------------------------
+###NFS server start-up fails with “Port is already in use” error in the log file.
Another Gluster NFS server is running on the same machine.
@@ -291,27 +264,21 @@ To resolve this error one of the Gluster NFS servers will have to be
shutdown. At this time, Gluster NFS server does not support running
multiple NFS servers on the same machine.
-mount command fails with “rpc.statd” related error message
-----------------------------------------------------------
+###mount command fails with “rpc.statd” related error message
If the mount command fails with the following error message:
-mount.nfs: rpc.statd is not running but is required for remote locking.
-mount.nfs: Either use '-o nolock' to keep locks local, or start statd.
-
-Start rpc.statd
+ mount.nfs: rpc.statd is not running but is required for remote locking.
+ mount.nfs: Either use '-o nolock' to keep locks local, or start statd.
For NFS clients to mount the NFS server, rpc.statd service must be
-running on the clients.
-
-Start rpc.statd service by running the following command:
+running on the clients. Start rpc.statd service by running the following command:
`$ rpc.statd `
-mount command takes too long to finish.
----------------------------------------
+###mount command takes too long to finish.
-Start rpcbind service on the NFS client.
+**Start rpcbind service on the NFS client**
The problem is that the rpcbind or portmap service is not running on the
NFS client. The resolution for this is to start either of these services
@@ -324,8 +291,7 @@ following command is required:
`$ /etc/init.d/rpcbind start`
-NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log.
-----------------------------------------------------------------------------------------------------------------------------------------------
+###NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log.
NFS start-up can succeed but the initialization of the NFS service can
still fail preventing clients from accessing the mount points. Such a
@@ -341,7 +307,7 @@ file:
[2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed
[2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
-1. Start portmap or rpcbind service on the NFS server.
+1. **Start portmap or rpcbind service on the NFS server**
On most Linux distributions, portmap can be started using the
following command:
@@ -356,7 +322,7 @@ file:
After starting portmap or rpcbind, gluster NFS server needs to be
restarted.
-2. Stop another NFS server running on the same machine.
+2. **Stop another NFS server running on the same machine**
Such an error is also seen when there is another NFS server running
on the same machine but it is not the Gluster NFS server. On Linux
@@ -372,18 +338,17 @@ file:
`$ /etc/init.d/nfs stop`
-3. Restart Gluster NFS server.
+3. **Restart Gluster NFS server**
-mount command fails with NFS server failed error.
--------------------------------------------------
+###mount command fails with NFS server failed error.
mount command fails with following error
-*mount: mount to NFS server '10.1.10.11' failed: timed out (retrying).*
+ *mount: mount to NFS server '10.1.10.11' failed: timed out (retrying).*
Perform one of the following to resolve this issue:
-1. Disable name lookup requests from NFS server to a DNS server.
+1. **Disable name lookup requests from NFS server to a DNS server**
The NFS server attempts to authenticate NFS clients by performing a
reverse DNS lookup to match hostnames in the volume file with the
@@ -400,16 +365,14 @@ Perform one of the following to resolve this issue:
`option rpc-auth.addr.namelookup off `
- > **Note**
- >
- > Note: Remember that disabling the NFS server forces authentication
+ > **Note**: Remember that disabling the NFS server forces authentication
> of clients to use only IP addresses and if the authentication
> rules in the volume file use hostnames, those authentication rules
> will fail and disallow mounting for those clients.
- or
+ **OR**
-2. NFS version used by the NFS client is other than version 3.
+2. **NFS version used by the NFS client is other than version 3**
Gluster NFS server supports version 3 of NFS protocol. In recent
Linux kernels, the default NFS version has been changed from 3 to 4.
@@ -421,18 +384,14 @@ Perform one of the following to resolve this issue:
`$ mount -o vers=3 `
-showmount fails with clnt\_create: RPC: Unable to receive
----------------------------------------------------------
+###showmount fails with clnt\_create: RPC: Unable to receive
Check your firewall setting to open ports 111 for portmap
requests/replies and Gluster NFS server requests/replies. Gluster NFS
server operates over the following port numbers: 38465, 38466, and
38467.
-For more information, see ?.
-
-Application fails with "Invalid argument" or "Value too large for defined data type" error.
--------------------------------------------------------------------------------------------
+###Application fails with "Invalid argument" or "Value too large for defined data type" error.
These two errors generally happen for 32-bit nfs clients or applications
that do not support 64-bit inode numbers or large files. Use the
@@ -443,7 +402,6 @@ Applications that will benefit are those that were either:
- built 32-bit and run on 32-bit machines such that they do not
support large files by default
-
- built 32-bit on 64-bit systems
This option is disabled by default so NFS returns 64-bit inode numbers
@@ -454,8 +412,8 @@ using the following flag with gcc:
` -D_FILE_OFFSET_BITS=64`
-Troubleshooting File Locks
-==========================
+<a name="file-locks" />
+##Troubleshooting File Locks
In GlusterFS 3.3 you can use `statedump` command to list the locks held
on files. The statedump output also provides information on each lock
@@ -463,16 +421,10 @@ with its range, basename, PID of the application holding the lock, and
so on. You can analyze the output to know about the locks whose
owner/application is no longer running or interested in that lock. After
ensuring that the no application is using the file, you can clear the
-lock using the following `clear lock` command:
-
-`# `
-
-For more information on performing `statedump`, see ?
-
-**To identify locked file and clear locks**
+lock using the following `clear lock` commands.
-1. Perform statedump on the volume to view the files that are locked
- using the following command:
+1. **Perform statedump on the volume to view the files that are locked
+ using the following command:**
`# gluster volume statedump inode`
@@ -517,9 +469,9 @@ For more information on performing `statedump`, see ?
lock-dump.domain.domain=vol-replicate-0
inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
-2. Clear the lock using the following command:
+2. **Clear the lock using the following command:**
- `# `
+ `# gluster volume clear-locks`
For example, to clear the entry lock on `file1` of test-volume:
@@ -527,9 +479,9 @@ For more information on performing `statedump`, see ?
Volume clear-locks successful
vol-locks: entry blocked locks=0 granted locks=1
-3. Clear the inode lock using the following command:
+3. **Clear the inode lock using the following command:**
- `# `
+ `# gluster volume clear-locks`
For example, to clear the inode lock on `file1` of test-volume:
diff --git a/doc/admin-guide/en-US/markdown/gfs_introduction.md b/doc/admin-guide/en-US/markdown/gfs_introduction.md
index fd2c53dc9..9f9c05815 100644
--- a/doc/admin-guide/en-US/markdown/gfs_introduction.md
+++ b/doc/admin-guide/en-US/markdown/gfs_introduction.md
@@ -13,7 +13,7 @@ managing data in a single global namespace. GlusterFS is based on a
stackable user space design, delivering exceptional performance for
diverse workloads.
-![ Virtualized Cloud Environments ][]
+![ Virtualized Cloud Environments ](../images/640px-GlusterFS_Architecture.png)
GlusterFS is designed for today's high-performance, virtualized cloud
environments. Unlike traditional data centers, cloud environments
@@ -24,27 +24,8 @@ hybrid environments.
GlusterFS is in production at thousands of enterprises spanning media,
healthcare, government, education, web 2.0, and financial services. The
-following table lists the commercial offerings and its documentation
-location:
+following table lists the commercial offerings:
- ------------------------------------------------------------------------
- Product Documentation Location
- ----------- ------------------------------------------------------------
- Red Hat [][]
- Storage
- Software
- Appliance
-
- Red Hat [][1]
- Virtual
- Storage
- Appliance
-
- Red Hat [][2]
- Storage
- ------------------------------------------------------------------------
-
- [ Virtualized Cloud Environments ]: images/640px-GlusterFS_Architecture.png
- []: http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html
- [1]: http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html
- [2]: http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html
+* [Red Hat Storage](https://access.redhat.com/site/documentation/Red_Hat_Storage/)
+* Red Hat Storage Software Appliance
+* Red Hat Virtual Storage Appliance
diff --git a/doc/admin-guide/en-US/markdown/glossary.md b/doc/admin-guide/en-US/markdown/glossary.md
index 0febaff8f..0203319b0 100644
--- a/doc/admin-guide/en-US/markdown/glossary.md
+++ b/doc/admin-guide/en-US/markdown/glossary.md
@@ -1,10 +1,10 @@
Glossary
========
-Brick
-: A Brick is the GlusterFS basic unit of storage, represented by an
+**Brick**
+: A Brick is the basic unit of storage in GlusterFS, represented by an
export directory on a server in the trusted storage pool. A Brick is
- expressed by combining a server with an export directory in the
+ represented by combining a server name with an export directory in the
following format:
`SERVER:EXPORT`
@@ -13,15 +13,22 @@ Brick
`myhostname:/exports/myexportdir/`
-Cluster
+**Client**
+: Any machine that mounts a GlusterFS volume.
+
+**Cluster**
: A cluster is a group of linked computers, working together closely
thus in many respects forming a single computer.
-Distributed File System
+**Distributed File System**
: A file system that allows multiple clients to concurrently access
data over a computer network.
-Filesystem
+**Extended Attributes**
+: Extended file attributes (abbreviated xattr) is a file system feature
+ that enables users/programs to associate files/dirs with metadata.
+
+**Filesystem**
: A method of storing and organizing computer files and their data.
Essentially, it organizes these files into a database for the
storage, organization, manipulation, and retrieval by the computer's
@@ -29,7 +36,7 @@ Filesystem
Source: [Wikipedia][]
-FUSE
+**FUSE**
: Filesystem in Userspace (FUSE) is a loadable kernel module for
Unix-like computer operating systems that lets non-privileged users
create their own file systems without editing kernel code. This is
@@ -38,26 +45,38 @@ FUSE
Source: [Wikipedia][1]
-Geo-Replication
+**Geo-Replication**
: Geo-replication provides a continuous, asynchronous, and incremental
replication service from site to another over Local Area Networks
(LAN), Wide Area Network (WAN), and across the Internet.
-glusterd
+**GFID**
+: Each file/directory on a GlusterFS volume has a unique 128-bit number
+ associated with it called the GFID. This is analogous to inode in a
+ regular filesystem.
+
+**glusterd**
: The Gluster management daemon that needs to run on all servers in
the trusted storage pool.
-Metadata
+**Infiniband**
+ InfiniBand is a switched fabric computer network communications link
+ used in high-performance computing and enterprise data centers.
+
+**Metadata**
: Metadata is data providing information about one or more other
pieces of data.
-Namespace
+**Namespace**
: Namespace is an abstract container or environment created to hold a
logical grouping of unique identifiers or symbols. Each Gluster
volume exposes a single namespace as a POSIX mount point that
contains every file in the cluster.
-Open Source
+**Node**
+: A server or computer that hosts one or more bricks.
+
+**Open Source**
: Open source describes practices in production and development that
promote access to the end product's source materials. Some consider
open source a philosophy, others consider it a pragmatic
@@ -76,7 +95,7 @@ Open Source
Source: [Wikipedia][2]
-Petabyte
+**Petabyte**
: A petabyte (derived from the SI prefix peta- ) is a unit of
information equal to one quadrillion (short scale) bytes, or 1000
terabytes. The unit symbol for the petabyte is PB. The prefix peta-
@@ -89,7 +108,7 @@ Petabyte
Source: [Wikipedia][3]
-POSIX
+**POSIX**
: Portable Operating System Interface (for Unix) is the name of a
family of related standards specified by the IEEE to define the
application programming interface (API), along with shell and
@@ -97,34 +116,79 @@ POSIX
Unix operating system. Gluster exports a fully POSIX compliant file
system.
-RAID
+**Quorum**
+: The configuration of quorum in a trusted storage pool determines the
+ number of server failures that the trusted storage pool can sustain.
+ If an additional failure occurs, the trusted storage pool becomes
+ unavailable.
+
+**Quota**
+: Quotas allow you to set limits on usage of disk space by directories or
+ by volumes.
+
+**RAID**
: Redundant Array of Inexpensive Disks (RAID) is a technology that
provides increased storage reliability through redundancy, combining
multiple low-cost, less-reliable disk drives components into a
logical unit where all drives in the array are interdependent.
-RRDNS
+**RDMA**
+: Remote direct memory access (RDMA) is a direct memory access from the
+ memory of one computer into that of another without involving either
+ one's operating system. This permits high-throughput, low-latency
+ networking, which is especially useful in massively parallel computer
+ clusters.
+
+**Rebalance**
+: A process of fixing layout and resdistributing data in a volume when a
+ brick is added or removed.
+
+**RRDNS**
: Round Robin Domain Name Service (RRDNS) is a method to distribute
load across application servers. RRDNS is implemented by creating
multiple A records with the same name and different IP addresses in
the zone file of a DNS server.
-Trusted Storage Pool
+**Samba**
+: Samba allows file and print sharing between computers running Windows and
+ computers running Linux. It is an implementation of several services and
+ protocols including SMB and CIFS.
+
+**Self-Heal**
+: The self-heal daemon that runs in the background, identifies
+ inconsistencies in files/dirs in a replicated volume and then resolves
+ or heals them. This healing process is usually required when one or more
+ bricks of a volume goes down and then comes up later.
+
+**Split-brain**
+: This is a situation where data on two or more bricks in a replicated
+ volume start to diverge in terms of content or metadata. In this state,
+ one cannot determine programitically which set of data is "right" and
+ which is "wrong".
+
+**Translator**
+: Translators (also called xlators) are stackable modules where each
+ module has a very specific purpose. Translators are stacked in a
+ hierarchical structure called as graph. A translator recieves data
+ from its parent translator, performs necessary operations and then
+ passes the data down to its child translator in hierarchy.
+
+**Trusted Storage Pool**
: A storage pool is a trusted network of storage servers. When you
start the first server, the storage pool consists of that server
alone.
-Userspace
+**Userspace**
: Applications running in user space don’t directly interact with
hardware, instead using the kernel to moderate access. Userspace
applications are generally more portable than applications in kernel
space. Gluster is a user space application.
-Volfile
+**Volfile**
: Volfile is a configuration file used by glusterfs process. Volfile
will be usually located at `/var/lib/glusterd/vols/VOLNAME`.
-Volume
+**Volume**
: A volume is a logical collection of bricks. Most of the gluster
management operations happen on the volume.
diff --git a/doc/admin-guide/en-US/markdown/pdfgen.sh b/doc/admin-guide/en-US/markdown/pdfgen.sh
new file mode 100755
index 000000000..68b320617
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/pdfgen.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# pdfgen.sh simple pdf generation helper script.
+# Copyright (C) 2012-2013 James Shubin
+# Written by James Shubin <james@shubin.ca>
+
+#dir='/tmp/pdf'
+dir=`pwd`'/output/'
+ln -s ../images images
+mkdir -p "$dir"
+
+for i in *.md; do
+ pandoc $i -o "$dir"`echo $i | sed 's/\.md$/\.pdf/'`
+done
+
+rm images # remove symlink
+
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 01b7f7554..e6061ffc6 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -1,4 +1,4 @@
-.\" Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+.\" Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
.\" This file is licensed to you under your choice of the GNU Lesser
@@ -8,21 +8,24 @@
.\"
.\"
.\"
-.TH GlusterFS 8 "Cluster Filesystem" "18 March 2010" "Gluster Inc."
+.TH GlusterFS 8 "Cluster Filesystem" "14 September 2013" "Red Hat, Inc."
.SH NAME
-mount.glusterfs - script to mount native GlusterFS volume
+.B mount.glusterfs - script to mount native GlusterFS volume
.SH SYNOPSIS
-.B mount -t glusterfs
-.I [-o <options>] <volumeserver>:<volumeid> <mountpoint>
+.B mount -t glusterfs [-o <options>] <volumeserver>:/<volume>
+.B <mountpoint>
.TP
-.B mount -t glusterfs
-.I [-o <options>] <path/to/volumefile> <mountpoint>
+.B mount -t glusterfs [-o <options>] <server1>,<server2>,
+.B <server3>,..<serverN>:/<volname> <mount_point>
+.TP
+.TP
+.B mount -t glusterfs [-o <options>] <path/to/volumefile> <mountpoint>
.PP
.SH DESCRIPTION
This tool is part of \fBglusterfs\fR(8) package, which is used to mount using
GlusterFS native binary.
-\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting
+\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting
native GlusterFS client. This subcommand, however, can also be used as a
standalone command with limited functionality.
@@ -38,17 +41,46 @@ File to use for logging [default:/var/log/glusterfs/glusterfs.log]
Logging severity. Valid options are TRACE, DEBUG, WARNING, ERROR, CRITICAL
INFO and NONE [default: INFO]
.TP
+\fBacl
+Mount the filesystem with POSIX ACL support
+.TP
+\fBfopen\-keep\-cache
+Do not purge the cache on file open
+.TP
+\fBselinux
+Enable SELinux label (extened attributes) support on inodes
+.TP
+\fBworm
+Mount the filesystem in 'worm' mode
+.TP
+\fBaux\-gfid\-mount
+Enable access to filesystem through gfid directly
+.TP
\fBro\fR
Mount the filesystem read-only
+.TP
+\fBenable\-ino32=\fRBOOL
+Use 32-bit inodes when mounting to workaround broken applications that don't
+support 64-bit inodes
+
.PP
.SS "Advanced options"
.PP
.TP
-\fBvolfile\-id=\fRKEY
-Volume key or name of the volume file to be fetched from server
+\fBattribute\-timeout=\fRSECONDS
+Set attribute timeout to SECONDS for inodes in fuse kernel module [default: 1]
+.TP
+\fBentry\-timeout=\fRSECONDS
+Set entry timeout to SECONDS in fuse kernel module [default: 1]
+.TP
+\fBbackground\-qlen=\fRN
+Set fuse module's background queue length to N [default: 64]
.TP
-\fBtransport=\fRTRANSPORT-TYPE
-Transport type to get volume file from server [default: tcp]
+\fBgid\-timeout=\fRSECONDS
+Set auxilary group list timeout to SECONDS for fuse translator [default: 0]
+.TP
+\fBnegative\-timeout=\fRSECONDS
+Set negative timeout to SECONDS in fuse kernel module [default: 0]
.TP
\fBvolume\-name=\fRVOLUME-NAME
Volume name to be used for MOUNT-POINT [default: top most volume in
@@ -57,22 +89,41 @@ VOLUME-FILE]
\fBdirect\-io\-mode=\fRdisable
Disable direct I/O mode in fuse kernel module
.TP
+\fBcongestion\-threshold=\fRN
+Set fuse module's congestion threshold to N [default: 48]
+.TP
+.TP
+\fBbackup\-volfile\-servers=\fRSERVERLIST
+Provide list of backup volfile servers in the following format [default: None]
+
+\fB$ mount -t glusterfs -obackup-volfile-servers=<server2>:\fR
+\fB <server3>:...:<serverN> <server1>:/<volname> <mount_point>\fR
+
+.TP
+.TP
+\fBbackupvolfile\-server=\fRSERVER
+Provide list of backup volfile servers in the following format [default: None]
+
+\fB $ mount -t glusterfs -obackupvolfile-server=<server2>
+\fB <server1>:/<volname> <mount_point>
+
+.TP
.PP
.SH FILES
.TP
.I /etc/fstab
A typical GlusterFS entry in /etc/fstab looks like below
-server1.gluster.com:mirror /mnt/mirror glusterfs log-file=/var/log/mirror.vol,ro,defaults 0 0
+\fBserver1:/mirror /mnt/mirror glusterfs log-file=/var/log/mirror.log,acl,selinux 0 0\fR
.TP
-.I /etc/mtab
-An example entry of a GlusterFS mountpoint in /etc/mtab looks like below
+.I /proc/mounts
+An example entry of a GlusterFS mountpoint in /proc/mounts looks like below
-mirror.vol /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0
+\fBserver1:/mirror /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0\fR
.SH SEE ALSO
\fBglusterfs\fR(8), \fBmount\fR(8), \fBgluster\fR(8)
.SH COPYRIGHT
-Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+Copyright(c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
diff --git a/extras/Ubuntu/README.Ubuntu b/extras/Ubuntu/README.Ubuntu
index 0c5b7828d..890da3ca6 100644
--- a/extras/Ubuntu/README.Ubuntu
+++ b/extras/Ubuntu/README.Ubuntu
@@ -1,5 +1,6 @@
Bug 765014 - Mounting from localhost in fstab fails at boot on ubuntu
-(https://bugzilla.redhat.com/show_bug.cgi?id=765014)
+(original bug: https://bugzilla.redhat.com/show_bug.cgi?id=765014)
+(updated in: https://bugzilla.redhat.com/show_bug.cgi?id=1047007)
(https://bugs.launchpad.net/ubuntu/+source/glusterfs/+bug/876648)
Ubuntu uses upstart instead of init to bootstrap the system and it has a unique
@@ -10,15 +11,16 @@ and the volume is mounted from localhost, the mount fails at boot time. To
correct this we need to launch glusterd using upstart and block the glusterfs
mounting event until glusterd is started.
-The glusterd.conf file contains the necessary configuration for upstart to
-manage the glusterd service. It should be placed in /etc/init/glusterd.conf
+The glusterfs-server.conf file contains the necessary configuration for upstart to
+manage the glusterd service. It should be placed in /etc/init/glusterfs-server.conf
on Ubuntu systems, and then the old initscript /etc/init.d/glusterd can be
removed. An additional upstart job, mounting-glusterfs.conf, is also required
-to block mounting glusterfs volumes until the glusterd service is available.
+to block mounting glusterfs volumes until the network interfaces are available.
Both of these upstart jobs need to be placed in /etc/init to resolve the issue.
-Starting with Ubuntu 12.04, Precise Pangolin, these upstart jobs will be
-included with the glusterfs-server package in the Ubuntu repository.
+Starting with Ubuntu 14.04, Trusty Tahr, these upstart jobs will be included
+with the glusterfs-server and glusterfs-client packages in the Ubuntu
+universe repository.
This affects all versions of glusterfs on the Ubuntu platform since at least
10.04, Lucid Lynx.
diff --git a/extras/Ubuntu/glusterd.conf b/extras/Ubuntu/glusterfs-server.conf
index aa99502b0..aa99502b0 100644
--- a/extras/Ubuntu/glusterd.conf
+++ b/extras/Ubuntu/glusterfs-server.conf
diff --git a/extras/Ubuntu/mounting-glusterfs.conf b/extras/Ubuntu/mounting-glusterfs.conf
index 3c59c0f63..786ef16df 100644
--- a/extras/Ubuntu/mounting-glusterfs.conf
+++ b/extras/Ubuntu/mounting-glusterfs.conf
@@ -1,7 +1,6 @@
author "Louis Zuckerman <me@louiszuckerman.com>"
-description "Block the mounting event for glusterfs filesystems until glusterd is running"
+description "Block the mounting event for glusterfs filesystems until the network interfaces are running"
start on mounting TYPE=glusterfs
task
-exec start wait-for-state WAIT_FOR=glusterd WAITER=mounting-glusterfs
-
+exec start wait-for-state WAIT_FOR=static-network-up WAITER=mounting-glusterfs
diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate
index 6a69ab1e3..85e69d2c0 100644
--- a/extras/glusterfs-georep-logrotate
+++ b/extras/glusterfs-georep-logrotate
@@ -7,12 +7,30 @@ delaycompress
notifempty
/var/log/glusterfs/geo-replication/*/*.log {
+ sharedscripts
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
}
/var/log/glusterfs/geo-replication-slaves/*.log {
+ sharedscripts
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
}
/var/log/glusterfs/geo-replication-slaves/*/*.log {
+ sharedscripts
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
}
diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in
index ef630bd44..a39fdbfb5 100755
--- a/geo-replication/src/peer_gsec_create.in
+++ b/geo-replication/src/peer_gsec_create.in
@@ -8,5 +8,11 @@ if [ ! -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub ]; then
ssh-keygen -N '' -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem > /dev/null
fi
-output=`echo command=\"@libexecdir@/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub`
-echo $output
+if [ ! -f "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem.pub ]; then
+ \rm -rf "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem*
+ ssh-keygen -N '' -f "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem > /dev/null
+fi
+
+output1=`echo command=\"${exec_prefix}/libexec/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub`
+output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem.pub`
+echo -e "$output1\n$output2"
diff --git a/geo-replication/syncdaemon/configinterface.py b/geo-replication/syncdaemon/configinterface.py
index a326e8246..0f764c47a 100644
--- a/geo-replication/syncdaemon/configinterface.py
+++ b/geo-replication/syncdaemon/configinterface.py
@@ -5,6 +5,10 @@ except ImportError:
import configparser as ConfigParser
import re
from string import Template
+import os
+import errno
+import sys
+from stat import ST_DEV, ST_INO, ST_MTIME
from syncdutils import escape, unescape, norm, update_file, GsyncdError
@@ -65,8 +69,38 @@ class GConffile(object):
self.auxdicts = dd
self.config = ConfigParser.RawConfigParser()
self.config.read(path)
+ self.dev, self.ino, self.mtime = -1, -1, -1
self._normconfig()
+ def _load(self):
+ try:
+ sres = os.stat(self.path)
+ self.dev = sres[ST_DEV]
+ self.ino = sres[ST_INO]
+ self.mtime = sres[ST_MTIME]
+ except (OSError, IOError):
+ if sys.exc_info()[1].errno == errno.ENOENT:
+ sres = None
+
+ self.config.read(self.path)
+ self._normconfig()
+
+ def get_realtime(self, opt):
+ try:
+ sres = os.stat(self.path)
+ except (OSError, IOError):
+ if sys.exc_info()[1].errno == errno.ENOENT:
+ sres = None
+ else:
+ raise
+
+ # compare file system stat with that of our stream file handle
+ if not sres or sres[ST_DEV] != self.dev or \
+ sres[ST_INO] != self.ino or self.mtime != sres[ST_MTIME]:
+ self._load()
+
+ return self.get(opt, printValue=False)
+
def section(self, rx=False):
"""get the section name of the section representing .peers in .config"""
peers = self.peers
@@ -162,7 +196,7 @@ class GConffile(object):
if self.config.has_section(self.section()):
update_from_sect(self.section(), MultiDict(dct, *self.auxdicts))
- def get(self, opt=None):
+ def get(self, opt=None, printValue=True):
"""print the matching key/value pairs from .config,
or if @opt given, the value for @opt (according to the
logic described in .update_to)
@@ -173,7 +207,10 @@ class GConffile(object):
opt = norm(opt)
v = d.get(opt)
if v:
- print(v)
+ if printValue:
+ print(v)
+ else:
+ return v
else:
for k, v in d.iteritems():
if k == '__name__':
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index 7fcc3165a..64c26a5d2 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -191,6 +191,7 @@ def main_i():
op.add_option('--log-file-mbr', metavar='LOGF', type=str, action='callback', callback=store_abs)
op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs)
op.add_option('--state-detail-file', metavar='STATF', type=str, action='callback', callback=store_abs)
+ op.add_option('--georep-session-working-dir', metavar='STATF', type=str, action='callback', callback=store_abs)
op.add_option('--ignore-deletes', default=False, action='store_true')
op.add_option('--isolated-slave', default=False, action='store_true')
op.add_option('--use-rsync-xattrs', default=False, action='store_true')
@@ -202,6 +203,7 @@ def main_i():
op.add_option('--local-id', metavar='ID', help=SUPPRESS_HELP, default='')
op.add_option('--local-path', metavar='PATH', help=SUPPRESS_HELP, default='')
op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh')
+ op.add_option('--ssh-command-tar', metavar='CMD', default='ssh')
op.add_option('--rsync-command', metavar='CMD', default='rsync')
op.add_option('--rsync-options', metavar='OPTS', default='')
op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress')
@@ -228,6 +230,7 @@ def main_i():
op.add_option('--change-interval', metavar='SEC', type=int, default=3)
# working directory for changelog based mechanism
op.add_option('--working-dir', metavar='DIR', type=str, action='callback', callback=store_abs)
+ op.add_option('--use-tarssh', default=False, action='store_true')
op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local)
# duh. need to specify dest or value will be mapped to None :S
@@ -474,8 +477,15 @@ def main_i():
GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
if confdata.op == 'set':
logging.info('checkpoint %s set' % confdata.val)
+ gcnf.delete('checkpoint_completed')
+ gcnf.delete('checkpoint_target')
elif confdata.op == 'del':
logging.info('checkpoint info was reset')
+ # if it is removing 'checkpoint' then we need
+ # to remove 'checkpoint_completed' and 'checkpoint_target' too
+ gcnf.delete('checkpoint_completed')
+ gcnf.delete('checkpoint_target')
+
except IOError:
if sys.exc_info()[1].errno == ENOENT:
# directory of log path is not present,
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 95810a61e..721fe18bd 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -10,15 +10,16 @@ import socket
import string
import errno
from shutil import copyfileobj
-from errno import ENOENT, ENODATA, EPIPE, EEXIST
+from errno import ENOENT, ENODATA, EPIPE, EEXIST, errorcode
from threading import currentThread, Condition, Lock
from datetime import datetime
+from libcxattr import Xattr
from gconf import gconf
from tempfile import mkdtemp, NamedTemporaryFile
from syncdutils import FreeObject, Thread, GsyncdError, boolify, escape, \
unescape, select, gauxpfx, md5hex, selfkill, entry2pb, \
- lstat, errno_wrap
+ lstat, errno_wrap, update_file
URXTIME = (-1, 0)
@@ -59,7 +60,8 @@ def gmaster_builder(excrawl=None):
crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
sendmarkmixin = boolify(gconf.use_rsync_xattrs) and SendmarkRsyncMixin or SendmarkNormalMixin
purgemixin = boolify(gconf.ignore_deletes) and PurgeNoopMixin or PurgeNormalMixin
- class _GMaster(crawlmixin, modemixin, sendmarkmixin, purgemixin):
+ syncengine = boolify(gconf.use_tarssh) and TarSSHEngine or RsyncEngine
+ class _GMaster(crawlmixin, modemixin, sendmarkmixin, purgemixin, syncengine):
pass
return _GMaster
@@ -101,14 +103,17 @@ class NormalMixin(object):
if not 'default_xtime' in opts:
opts['default_xtime'] = URXTIME
- def xtime_low(self, server, path, **opts):
- xt = server.xtime(path, self.uuid)
+ def xtime_low(self, rsc, path, **opts):
+ if rsc == self.master:
+ xt = rsc.server.xtime(path, self.uuid)
+ else:
+ xt = rsc.server.stime(path, self.uuid)
if isinstance(xt, int) and xt != ENODATA:
return xt
if xt == ENODATA or xt < self.volmark:
if opts['create']:
xt = _xtime_now()
- server.aggregated.set_xtime(path, self.uuid, xt)
+ rsc.server.aggregated.set_xtime(path, self.uuid, xt)
else:
xt = opts['default_xtime']
return xt
@@ -140,7 +145,7 @@ class NormalMixin(object):
return xte > xtrd
def set_slave_xtime(self, path, mark):
- self.slave.server.set_xtime(path, self.uuid, mark)
+ self.slave.server.set_stime(path, self.uuid, mark)
self.slave.server.set_xtime_remote(path, self.uuid, mark)
class PartialMixin(NormalMixin):
@@ -190,6 +195,65 @@ class PurgeNoopMixin(object):
def purge_missing(self, path, names):
pass
+class TarSSHEngine(object):
+ """Sync engine that uses tar(1) piped over ssh(1)
+ for data transfers. Good for lots of small files.
+ """
+ def a_syncdata(self, files):
+ logging.debug('files: %s' % (files))
+ for f in files:
+ pb = self.syncer.add(f)
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug('synced ' + se)
+ return True
+ else:
+ # stat check for file presence
+ st = lstat(se)
+ if isinstance(st, int):
+ return True
+ logging.warn('tar+ssh: %s [errcode: %d]' % (se, rv[1]))
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
+
+ def syncdata_wait(self):
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def syncdata(self, files):
+ self.a_syncdata(files)
+ self.syncdata_wait()
+
+class RsyncEngine(object):
+ """Sync engine that uses rsync(1) for data transfers"""
+ def a_syncdata(self, files):
+ logging.debug('files: %s' % (files))
+ for f in files:
+ logging.debug('candidate for syncing %s' % f)
+ pb = self.syncer.add(f)
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug('synced ' + se)
+ return True
+ else:
+ if rv[1] in [23, 24]:
+ # stat to check if the file exist
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ return True
+ logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1]))
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
+
+ def syncdata_wait(self):
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def syncdata(self, files):
+ self.a_syncdata(files)
+ self.syncdata_wait()
+
class GMasterCommon(object):
"""abstract class impementling master role"""
@@ -234,7 +298,7 @@ class GMasterCommon(object):
else:
rsc = self.master
self.make_xtime_opts(rsc == self.master, opts)
- return self.xtime_low(rsc.server, path, **opts)
+ return self.xtime_low(rsc, path, **opts)
def get_initial_crawl_data(self):
# while persisting only 'files_syncd' is non-zero, rest of
@@ -243,18 +307,26 @@ class GMasterCommon(object):
default_data = {'files_syncd': 0,
'files_remaining': 0,
'bytes_remaining': 0,
- 'purges_remaining': 0}
+ 'purges_remaining': 0,
+ 'total_files_skipped': 0}
if getattr(gconf, 'state_detail_file', None):
try:
- return json.load(open(gconf.state_detail_file))
- except (IOError, OSError):
+ with open(gconf.state_detail_file, 'r+') as f:
+ loaded_data= json.load(f)
+ diff_data = set(default_data) - set (loaded_data)
+ if len(diff_data):
+ for i in diff_data:
+ loaded_data[i] = default_data[i]
+ return loaded_data
+ except (IOError):
ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- # Create file with initial data
+ logging.warn ('Creating new gconf.state_detail_file.')
+ # Create file with initial data
+ try:
with open(gconf.state_detail_file, 'wb') as f:
json.dump(default_data, f)
return default_data
- else:
+ except:
raise
return default_data
@@ -264,6 +336,8 @@ class GMasterCommon(object):
same_dir = os.path.dirname(gconf.state_detail_file)
with NamedTemporaryFile(dir=same_dir, delete=False) as tmp:
json.dump(self.total_crawl_stats, tmp)
+ tmp.flush()
+ os.fsync(tmp.fileno())
os.rename(tmp.name, gconf.state_detail_file)
except (IOError, OSError):
raise
@@ -272,7 +346,13 @@ class GMasterCommon(object):
self.master = master
self.slave = slave
self.jobtab = {}
- self.syncer = Syncer(slave)
+ if boolify(gconf.use_tarssh):
+ logging.info("using 'tar over ssh' as the sync engine")
+ self.syncer = Syncer(slave, self.slave.tarssh)
+ else:
+ logging.info("using 'rsync' as the sync engine")
+ # partial transfer (cf. rsync(1)), that's normal
+ self.syncer = Syncer(slave, self.slave.rsync, [23, 24])
# crawls vs. turns:
# - self.crawls is simply the number of crawl() invocations on root
# - one turn is a maximal consecutive sequence of crawls so that each
@@ -294,6 +374,8 @@ class GMasterCommon(object):
self.terminate = False
self.sleep_interval = 1
self.checkpoint_thread = None
+ self.current_files_skipped_count = 0
+ self.skipped_gfid_list = []
def init_keep_alive(cls):
"""start the keep-alive thread """
@@ -336,7 +418,8 @@ class GMasterCommon(object):
gconf.configinterface.set('volume_id', self.uuid)
if self.volinfo:
if self.volinfo['retval']:
- raise GsyncdError("master is corrupt")
+ logging.warn("master cluster's info may not be valid %d" % \
+ self.volinfo['retval'])
self.start_checkpoint_thread()
else:
raise GsyncdError("master volinfo unavailable")
@@ -349,7 +432,7 @@ class GMasterCommon(object):
while not self.terminate:
if self.start:
logging.debug("... crawl #%d done, took %.6f seconds" % \
- (self.crawls, time.time() - self.start))
+ (self.crawls, time.time() - self.start))
self.start = time.time()
should_display_info = self.start - self.lastreport['time'] >= 60
if should_display_info:
@@ -363,9 +446,20 @@ class GMasterCommon(object):
if int(t1 - t0) >= 60: #lets hardcode this check to 60 seconds
crawl = self.should_crawl()
t0 = t1
+ self.update_worker_remote_node()
if not crawl:
+ self.update_worker_health("Passive")
+ # bring up _this_ brick to the cluster stime
+ # which is min of cluster (but max of the replicas)
+ brick_stime = self.xtime('.', self.slave)
+ cluster_stime = self.master.server.aggregated.stime_mnt('.', '.'.join([str(self.uuid), str(gconf.slave_id)]))
+ logging.debug("Cluster stime: %s | Brick stime: %s" % (repr(cluster_stime), repr(brick_stime)))
+ if not isinstance(cluster_stime, int):
+ if brick_stime < cluster_stime:
+ self.slave.server.set_stime(self.FLAT_DIR_HIERARCHY, self.uuid, cluster_stime)
time.sleep(5)
continue
+ self.update_worker_health("Active")
self.crawl()
if oneshot:
return
@@ -375,7 +469,7 @@ class GMasterCommon(object):
def _checkpt_param(cls, chkpt, prm, xtimish=True):
"""use config backend to lookup a parameter belonging to
checkpoint @chkpt"""
- cprm = getattr(gconf, 'checkpoint_' + prm, None)
+ cprm = gconf.configinterface.get_realtime('checkpoint_' + prm)
if not cprm:
return
chkpt_mapped, val = cprm.split(':', 1)
@@ -402,17 +496,6 @@ class GMasterCommon(object):
ts += '.' + str(tpair[1])
return ts
- def get_extra_info(self):
- str_info = '\nUptime=%s;FilesSyncd=%d;FilesPending=%d;BytesPending=%d;DeletesPending=%d;' % \
- (self._crawl_time_format(datetime.now() - self.crawl_start), \
- self.total_crawl_stats['files_syncd'], \
- self.total_crawl_stats['files_remaining'], \
- self.total_crawl_stats['bytes_remaining'], \
- self.total_crawl_stats['purges_remaining'])
- str_info += '\0'
- logging.debug(str_info)
- return str_info
-
def _crawl_time_format(self, crawl_time):
# Ex: 5 years, 4 days, 20:23:10
years, days = divmod(crawl_time.days, 365.25)
@@ -431,27 +514,49 @@ class GMasterCommon(object):
date += "%s:%s:%s" % (string.zfill(h, 2), string.zfill(m, 2), string.zfill(s, 2))
return date
- def checkpt_service(self, chan, chkpt, tgt):
+ def checkpt_service(self, chan, chkpt):
"""checkpoint service loop
monitor and verify checkpoint status for @chkpt, and listen
for incoming requests for whom we serve a pretty-formatted
status report"""
- if not chkpt:
- # dummy loop for the case when there is no checkpt set
- while True:
+ while True:
+ chkpt = gconf.configinterface.get_realtime("checkpoint")
+ if not chkpt:
+ gconf.configinterface.delete("checkpoint_completed")
+ gconf.configinterface.delete("checkpoint_target")
+ # dummy loop for the case when there is no checkpt set
select([chan], [], [])
conn, _ = chan.accept()
- conn.send(self.get_extra_info())
+ conn.send('\0')
conn.close()
- completed = self._checkpt_param(chkpt, 'completed', xtimish=False)
- if completed:
- completed = tuple(int(x) for x in completed.split('.'))
- while True:
+ continue
+
+ checkpt_tgt = self._checkpt_param(chkpt, 'target')
+ if not checkpt_tgt:
+ checkpt_tgt = self.xtime('.')
+ if isinstance(checkpt_tgt, int):
+ raise GsyncdError("master root directory is unaccessible (%s)",
+ os.strerror(checkpt_tgt))
+ self._set_checkpt_param(chkpt, 'target', checkpt_tgt)
+ logging.debug("checkpoint target %s has been determined for checkpoint %s" % \
+ (repr(checkpt_tgt), chkpt))
+
+ # check if the label is 'now'
+ chkpt_lbl = chkpt
+ try:
+ x1,x2 = chkpt.split(':')
+ if x1 == 'now':
+ chkpt_lbl = "as of " + self.humantime(x2)
+ except:
+ pass
+ completed = self._checkpt_param(chkpt, 'completed', xtimish=False)
+ if completed:
+ completed = tuple(int(x) for x in completed.split('.'))
s,_,_ = select([chan], [], [], (not completed) and 5 or None)
# either request made and we re-check to not
# give back stale data, or we still hunting for completion
- if self.native_xtime(tgt) and self.native_xtime(tgt) < self.volmark:
+ if self.native_xtime(checkpt_tgt) and self.native_xtime(checkpt_tgt) < self.volmark:
# indexing has been reset since setting the checkpoint
status = "is invalid"
else:
@@ -459,12 +564,12 @@ class GMasterCommon(object):
if isinstance(xtr, int):
raise GsyncdError("slave root directory is unaccessible (%s)",
os.strerror(xtr))
- ncompleted = self.xtime_geq(xtr, tgt)
+ ncompleted = self.xtime_geq(xtr, checkpt_tgt)
if completed and not ncompleted: # stale data
logging.warn("completion time %s for checkpoint %s became stale" % \
(self.humantime(*completed), chkpt))
completed = None
- gconf.confdata.delete('checkpoint-completed')
+ gconf.configinterface.delete('checkpoint_completed')
if ncompleted and not completed: # just reaching completion
completed = "%.6f" % time.time()
self._set_checkpt_param(chkpt, 'completed', completed, xtimish=False)
@@ -478,7 +583,7 @@ class GMasterCommon(object):
try:
conn, _ = chan.accept()
try:
- conn.send(" | checkpoint %s %s %s" % (chkpt, status, self.get_extra_info()))
+ conn.send("checkpoint %s is %s\0" % (chkpt_lbl, status))
except:
exc = sys.exc_info()[1]
if (isinstance(exc, OSError) or isinstance(exc, IOError)) and \
@@ -505,18 +610,8 @@ class GMasterCommon(object):
pass
chan.bind(state_socket)
chan.listen(1)
- checkpt_tgt = None
- if gconf.checkpoint:
- checkpt_tgt = self._checkpt_param(gconf.checkpoint, 'target')
- if not checkpt_tgt:
- checkpt_tgt = self.xtime('.')
- if isinstance(checkpt_tgt, int):
- raise GsyncdError("master root directory is unaccessible (%s)",
- os.strerror(checkpt_tgt))
- self._set_checkpt_param(gconf.checkpoint, 'target', checkpt_tgt)
- logging.debug("checkpoint target %s has been determined for checkpoint %s" % \
- (repr(checkpt_tgt), gconf.checkpoint))
- t = Thread(target=self.checkpt_service, args=(chan, gconf.checkpoint, checkpt_tgt))
+ chkpt = gconf.configinterface.get_realtime("checkpoint")
+ t = Thread(target=self.checkpt_service, args=(chan, chkpt))
t.start()
self.checkpoint_thread = t
@@ -567,15 +662,11 @@ class GMasterChangelogMixin(GMasterCommon):
POS_GFID = 0
POS_TYPE = 1
- POS_ENTRY1 = 2
- POS_ENTRY2 = 3 # renames
-
- _CL_TYPE_DATA_PFX = "D "
- _CL_TYPE_METADATA_PFX = "M "
- _CL_TYPE_ENTRY_PFX = "E "
+ POS_ENTRY1 = -1
- TYPE_GFID = [_CL_TYPE_DATA_PFX] # ignoring metadata ops
- TYPE_ENTRY = [_CL_TYPE_ENTRY_PFX]
+ TYPE_META = "M "
+ TYPE_GFID = "D "
+ TYPE_ENTRY = "E "
# flat directory heirarchy for gfid based access
FLAT_DIR_HIERARCHY = '.'
@@ -594,39 +685,11 @@ class GMasterChangelogMixin(GMasterCommon):
logging.debug('changelog working dir %s (log: %s)' % (workdir, logfile))
return (workdir, logfile)
- # update stats from *this* crawl
- def update_cumulative_stats(self, files_pending):
- self.total_crawl_stats['files_remaining'] = files_pending['count']
- self.total_crawl_stats['bytes_remaining'] = files_pending['bytes']
- self.total_crawl_stats['purges_remaining'] = files_pending['purge']
-
- # sync data
- def syncdata(self, datas):
- logging.debug('datas: %s' % (datas))
- for data in datas:
- logging.debug('candidate for syncing %s' % data)
- pb = self.syncer.add(data)
- def regjob(se, xte, pb):
- rv = pb.wait()
- if rv[0]:
- logging.debug('synced ' + se)
- return True
- else:
- if rv[1] in [23, 24]:
- # stat to check if the file exist
- st = lstat(se)
- if isinstance(st, int):
- # file got unlinked in the interim
- return True
- logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1]))
- self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, data, None, pb)
- if self.wait(self.FLAT_DIR_HIERARCHY, None):
- return True
-
def process_change(self, change, done, retry):
pfx = gauxpfx()
clist = []
entries = []
+ meta_gfid = set()
datas = set()
# basic crawl stats: files and bytes
@@ -652,136 +715,351 @@ class GMasterChangelogMixin(GMasterCommon):
dct[k] = ed[k]
return dct
- # regular file update: bytes & count
- def _update_reg(entry, size):
- if not entry in files_pending['files']:
- files_pending['count'] += 1
- files_pending['bytes'] += size
- files_pending['files'].append(entry)
- # updates for directories, symlinks etc..
- def _update_rest():
+ # entry counts (not purges)
+ def entry_update():
files_pending['count'] += 1
- # entry count
- def entry_update(entry, size, mode):
- if stat.S_ISREG(mode):
- _update_reg(entry, size)
- else:
- _update_rest()
# purge count
def purge_update():
files_pending['purge'] += 1
for e in clist:
e = e.strip()
- et = e[self.IDX_START:self.IDX_END]
- ec = e[self.IDX_END:].split(' ')
- if et in self.TYPE_ENTRY:
+ et = e[self.IDX_START:self.IDX_END] # entry type
+ ec = e[self.IDX_END:].split(' ') # rest of the bits
+
+ if et == self.TYPE_ENTRY:
+ # extract information according to the type of
+ # the entry operation. create(), mkdir() and mknod()
+ # have mode, uid, gid information in the changelog
+ # itself, so no need to stat()...
ty = ec[self.POS_TYPE]
+
+ # PARGFID/BNAME
en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1]))
+ # GFID of the entry
gfid = ec[self.POS_GFID]
- # definitely need a better way bucketize entry ops
+
if ty in ['UNLINK', 'RMDIR']:
purge_update()
entries.append(edct(ty, gfid=gfid, entry=en))
- continue
- go = os.path.join(pfx, gfid)
- st = lstat(go)
- if isinstance(st, int):
- if ty == 'RENAME':
- entries.append(edct('UNLINK', gfid=gfid, entry=en))
- else:
- logging.debug('file %s got purged in the interim' % go)
- continue
- entry_update(go, st.st_size, st.st_mode)
- if ty in ['CREATE', 'MKDIR', 'MKNOD']:
- entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
- elif ty == 'LINK':
- entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
- elif ty == 'SYMLINK':
- rl = errno_wrap(os.readlink, [en], [ENOENT])
- if isinstance(rl, int):
- continue
- entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
- elif ty == 'RENAME':
- e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2]))
- entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st))
+ elif ty in ['CREATE', 'MKDIR', 'MKNOD']:
+ entry_update()
+ # stat information present in the changelog itself
+ entries.append(edct(ty, gfid=gfid, entry=en, mode=int(ec[2]),\
+ uid=int(ec[3]), gid=int(ec[4])))
else:
- logging.warn('ignoring %s [op %s]' % (gfid, ty))
- elif et in self.TYPE_GFID:
- go = os.path.join(pfx, ec[0])
- st = lstat(go)
- if isinstance(st, int):
- logging.debug('file %s got purged in the interim' % go)
- continue
- entry_update(go, st.st_size, st.st_mode)
- datas.update([go])
+ # stat() to get mode and other information
+ go = os.path.join(pfx, gfid)
+ st = lstat(go)
+ if isinstance(st, int):
+ if ty == 'RENAME': # special hack for renames...
+ entries.append(edct('UNLINK', gfid=gfid, entry=en))
+ else:
+ logging.debug('file %s got purged in the interim' % go)
+ continue
+
+ if ty == 'LINK':
+ entry_update()
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
+ elif ty == 'SYMLINK':
+ rl = errno_wrap(os.readlink, [en], [ENOENT])
+ if isinstance(rl, int):
+ continue
+ entry_update()
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
+ elif ty == 'RENAME':
+ entry_update()
+ e1 = unescape(os.path.join(pfx, ec[self.POS_ENTRY1 - 1]))
+ entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en, stat=st))
+ else:
+ logging.warn('ignoring %s [op %s]' % (gfid, ty))
+ elif et == self.TYPE_GFID:
+ datas.add(os.path.join(pfx, ec[0]))
+ elif et == self.TYPE_META:
+ if ec[1] == 'SETATTR': # only setattr's for now...
+ meta_gfid.add(os.path.join(pfx, ec[0]))
+ else:
+ logging.warn('got invalid changelog type: %s' % (et))
logging.debug('entries: %s' % repr(entries))
if not retry:
- self.update_cumulative_stats(files_pending)
+ self.update_worker_cumilitive_status(files_pending)
# sync namespace
if (entries):
self.slave.server.entry_ops(entries)
+ # sync metadata
+ if (meta_gfid):
+ meta_entries = []
+ for go in meta_gfid:
+ st = lstat(go)
+ if isinstance(st, int):
+ logging.debug('file %s got purged in the interim' % go)
+ continue
+ meta_entries.append(edct('META', go=go, stat=st))
+ if meta_entries:
+ self.slave.server.meta_ops(meta_entries)
# sync data
- if self.syncdata(datas):
- if done:
- self.master.server.changelog_done(change)
- return True
-
- def sync_done(self):
- self.total_crawl_stats['files_syncd'] += self.total_crawl_stats['files_remaining']
- self.total_crawl_stats['files_remaining'] = 0
- self.total_crawl_stats['bytes_remaining'] = 0
- self.total_crawl_stats['purges_remaining'] = 0
- self.update_crawl_data()
+ if datas:
+ self.a_syncdata(datas)
def process(self, changes, done=1):
- for change in changes:
- tries = 0
- retry = False
- while True:
- logging.debug('processing change %s' % change)
- if self.process_change(change, done, retry):
- self.sync_done()
- break
- retry = True
- tries += 1
- if tries == self.MAX_RETRIES:
- logging.warn('changelog %s could not be processed - moving on...' % os.path.basename(change))
- self.sync_done()
- if done:
- self.master.server.changelog_done(change)
- break
- # it's either entry_ops() or Rsync that failed to do it's
- # job. Mostly it's entry_ops() [which currently has a problem
- # of failing to create an entry but failing to return an errno]
- # Therefore we do not know if it's either Rsync or the freaking
- # entry_ops() that failed... so we retry the _whole_ changelog
- # again.
- # TODO: remove entry retries when it's gets fixed.
- logging.warn('incomplete sync, retrying changelog: %s' % change)
- time.sleep(0.5)
- self.turns += 1
+ tries = 0
+ retry = False
- def upd_stime(self, stime):
+ while True:
+ self.skipped_gfid_list = []
+ self.current_files_skipped_count = 0
+
+ # first, fire all changelog transfers in parallel. entry and metadata
+ # are performed synchronously, therefore in serial. However at the end
+ # of each changelog, data is synchronized with syncdata_async() - which
+ # means it is serial w.r.t entries/metadata of that changelog but
+ # happens in parallel with data of other changelogs.
+
+ for change in changes:
+ logging.debug('processing change %s' % change)
+ self.process_change(change, done, retry)
+ if not retry:
+ self.turns += 1 # number of changelogs processed in the batch
+
+ # Now we wait for all the data transfers fired off in the above step
+ # to complete. Note that this is not ideal either. Ideally we want to
+ # trigger the entry/meta-data transfer of the next batch while waiting
+ # for the data transfer of the current batch to finish.
+
+ # Note that the reason to wait for the data transfer (vs doing it
+ # completely in the background and call the changelog_done()
+ # asynchronously) is because this waiting acts as a "backpressure"
+ # and prevents a spiraling increase of wait stubs from consuming
+ # unbounded memory and resources.
+
+ # update the slave's time with the timestamp of the _last_ changelog
+ # file time suffix. Since, the changelog prefix time is the time when
+ # the changelog was rolled over, introduce a tolerence of 1 second to
+ # counter the small delta b/w the marker update and gettimeofday().
+ # NOTE: this is only for changelog mode, not xsync.
+
+ # @change is the last changelog (therefore max time for this batch)
+ if self.syncdata_wait():
+ if done:
+ xtl = (int(change.split('.')[-1]) - 1, 0)
+ self.upd_stime(xtl)
+ map(self.master.server.changelog_done, changes)
+ self.update_worker_files_syncd()
+ break
+
+ # We do not know which changelog transfer failed, retry everything.
+ retry = True
+ tries += 1
+ if tries == self.MAX_RETRIES:
+ logging.warn('changelogs %s could not be processed - moving on...' % \
+ ' '.join(map(os.path.basename, changes)))
+ self.update_worker_total_files_skipped(self.current_files_skipped_count)
+ logging.warn('SKIPPED GFID = %s' % ','.join(self.skipped_gfid_list))
+ self.update_worker_files_syncd()
+ if done:
+ xtl = (int(change.split('.')[-1]) - 1, 0)
+ self.upd_stime(xtl)
+ map(self.master.server.changelog_done, changes)
+ break
+ # it's either entry_ops() or Rsync that failed to do it's
+ # job. Mostly it's entry_ops() [which currently has a problem
+ # of failing to create an entry but failing to return an errno]
+ # Therefore we do not know if it's either Rsync or the freaking
+ # entry_ops() that failed... so we retry the _whole_ changelog
+ # again.
+ # TODO: remove entry retries when it's gets fixed.
+ logging.warn('incomplete sync, retrying changelogs: %s' % \
+ ' '.join(map(os.path.basename, changes)))
+ time.sleep(0.5)
+
+ def upd_stime(self, stime, path=None):
+ if not path:
+ path = self.FLAT_DIR_HIERARCHY
if not stime == URXTIME:
- self.sendmark(self.FLAT_DIR_HIERARCHY, stime)
+ self.sendmark(path, stime)
+
+ def get_worker_status_file(self):
+ file_name = gconf.local_path+'.status'
+ file_name = file_name.replace("/", "_")
+ worker_status_file = gconf.georep_session_working_dir+file_name
+ return worker_status_file
+
+ def update_worker_status(self, key, value):
+ default_data = {"remote_node":"N/A",
+ "worker status":"Not Started",
+ "crawl status":"N/A",
+ "files_syncd": 0,
+ "files_remaining": 0,
+ "bytes_remaining": 0,
+ "purges_remaining": 0,
+ "total_files_skipped": 0}
+ worker_status_file = self.get_worker_status_file()
+ try:
+ with open(worker_status_file, 'r+') as f:
+ loaded_data = json.load(f)
+ loaded_data[key] = value
+ os.ftruncate(f.fileno(), 0)
+ os.lseek(f.fileno(), 0, os.SEEK_SET)
+ json.dump(loaded_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except (IOError, OSError, ValueError):
+ logging.info ('Creating new %s' % worker_status_file)
+ try:
+ with open(worker_status_file, 'wb') as f:
+ default_data[key] = value
+ json.dump(default_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except:
+ raise
+
+ def update_worker_cumilitive_status(self, files_pending):
+ default_data = {"remote_node":"N/A",
+ "worker status":"Not Started",
+ "crawl status":"N/A",
+ "files_syncd": 0,
+ "files_remaining": 0,
+ "bytes_remaining": 0,
+ "purges_remaining": 0,
+ "total_files_skipped": 0}
+ worker_status_file = self.get_worker_status_file()
+ try:
+ with open(worker_status_file, 'r+') as f:
+ loaded_data = json.load(f)
+ loaded_data['files_remaining'] = files_pending['count']
+ loaded_data['bytes_remaining'] = files_pending['bytes']
+ loaded_data['purges_remaining'] = files_pending['purge']
+ os.ftruncate(f.fileno(), 0)
+ os.lseek(f.fileno(), 0, os.SEEK_SET)
+ json.dump(loaded_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except (IOError, OSError, ValueError):
+ logging.info ('Creating new %s' % worker_status_file)
+ try:
+ with open(worker_status_file, 'wb') as f:
+ default_data['files_remaining'] = files_pending['count']
+ default_data['bytes_remaining'] = files_pending['bytes']
+ default_data['purges_remaining'] = files_pending['purge']
+ json.dump(default_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except:
+ raise
+
+ def update_worker_remote_node (self):
+ node = sys.argv[-1]
+ node = node.split("@")[-1]
+ remote_node_ip = node.split(":")[0]
+ remote_node_vol = node.split(":")[3]
+ remote_node = remote_node_ip + '::' + remote_node_vol
+ self.update_worker_status ('remote_node', remote_node)
+
+ def update_worker_health (self, state):
+ self.update_worker_status ('worker status', state)
+
+ def update_worker_crawl_status (self, state):
+ self.update_worker_status ('crawl status', state)
+
+ def update_worker_files_syncd (self):
+ default_data = {"remote_node":"N/A",
+ "worker status":"Not Started",
+ "crawl status":"N/A",
+ "files_syncd": 0,
+ "files_remaining": 0,
+ "bytes_remaining": 0,
+ "purges_remaining": 0,
+ "total_files_skipped": 0}
+ worker_status_file = self.get_worker_status_file()
+ try:
+ with open(worker_status_file, 'r+') as f:
+ loaded_data = json.load(f)
+ loaded_data['files_syncd'] += loaded_data['files_remaining']
+ loaded_data['files_remaining'] = 0
+ loaded_data['bytes_remaining'] = 0
+ loaded_data['purges_remaining'] = 0
+ os.ftruncate(f.fileno(), 0)
+ os.lseek(f.fileno(), 0, os.SEEK_SET)
+ json.dump(loaded_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except (IOError, OSError, ValueError):
+ logging.info ('Creating new %s' % worker_status_file)
+ try:
+ with open(worker_status_file, 'wb') as f:
+ json.dump(default_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except:
+ raise
+
+ def update_worker_files_remaining (self, state):
+ self.update_worker_status ('files_remaining', state)
+
+ def update_worker_bytes_remaining (self, state):
+ self.update_worker_status ('bytes_remaining', state)
+
+ def update_worker_purges_remaining (self, state):
+ self.update_worker_status ('purges_remaining', state)
+
+ def update_worker_total_files_skipped (self, value):
+ default_data = {"remote_node":"N/A",
+ "worker status":"Not Started",
+ "crawl status":"N/A",
+ "files_syncd": 0,
+ "files_remaining": 0,
+ "bytes_remaining": 0,
+ "purges_remaining": 0,
+ "total_files_skipped": 0}
+ worker_status_file = self.get_worker_status_file()
+ try:
+ with open(worker_status_file, 'r+') as f:
+ loaded_data = json.load(f)
+ loaded_data['total_files_skipped'] = value
+ loaded_data['files_remaining'] -= value
+ os.ftruncate(f.fileno(), 0)
+ os.lseek(f.fileno(), 0, os.SEEK_SET)
+ json.dump(loaded_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except (IOError, OSError, ValueError):
+ logging.info ('Creating new %s' % worker_status_file)
+ try:
+ with open(worker_status_file, 'wb') as f:
+ default_data['total_files_skipped'] = value
+ json.dump(default_data, f)
+ f.flush()
+ os.fsync(f.fileno())
+ except:
+ raise
def crawl(self):
+ self.update_worker_crawl_status("Changelog Crawl")
changes = []
+ # get stime (from the brick) and purge changelogs
+ # that are _historical_ to that time.
+ purge_time = self.xtime('.', self.slave)
+ if isinstance(purge_time, int):
+ purge_time = None
try:
self.master.server.changelog_scan()
self.crawls += 1
except OSError:
self.fallback_xsync()
+ self.update_worker_crawl_status("Hybrid Crawl")
changes = self.master.server.changelog_getchanges()
if changes:
- xtl = self.xtime(self.FLAT_DIR_HIERARCHY)
- if isinstance(xtl, int):
- raise GsyncdError('master is corrupt')
+ if purge_time:
+ logging.info("slave's time: %s" % repr(purge_time))
+ processed = [x for x in changes if int(x.split('.')[-1]) < purge_time[0]]
+ for pr in processed:
+ logging.info('skipping already processed change: %s...' % os.path.basename(pr))
+ self.master.server.changelog_done(pr)
+ changes.remove(pr)
logging.debug('processing changes %s' % repr(changes))
self.process(changes)
- self.upd_stime(xtl)
def register(self):
(workdir, logfile) = self.setup_working_dir()
@@ -799,17 +1077,20 @@ class GMasterChangelogMixin(GMasterCommon):
class GMasterXsyncMixin(GMasterChangelogMixin):
"""
-
This crawl needs to be xtime based (as of now
it's not. this is beacuse we generate CHANGELOG
file during each crawl which is then processed
by process_change()).
For now it's used as a one-shot initial sync
mechanism and only syncs directories, regular
- files and symlinks.
+ files, hardlinks and symlinks.
"""
+ XSYNC_MAX_ENTRIES = 1<<13
+
def register(self):
+ self.counter = 0
+ self.comlist = []
self.sleep_interval = 60
self.tempdir = self.setup_working_dir()[0]
self.tempdir = os.path.join(self.tempdir, 'xsync')
@@ -823,6 +1104,36 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
else:
raise
+ def crawl(self):
+ """
+ event dispatcher thread
+
+ this thread dispatches either changelog or synchronizes stime.
+ additionally terminates itself on recieving a 'finale' event
+ """
+ def Xsyncer():
+ self.Xcrawl()
+ t = Thread(target=Xsyncer)
+ t.start()
+ logging.info('starting hybrid crawl...')
+ self.update_worker_crawl_status("Hybrid Crawl")
+ while True:
+ try:
+ item = self.comlist.pop(0)
+ if item[0] == 'finale':
+ logging.info('finished hybrid crawl syncing')
+ break
+ elif item[0] == 'xsync':
+ logging.info('processing xsync changelog %s' % (item[1]))
+ self.process([item[1]], 0)
+ elif item[0] == 'stime':
+ logging.debug('setting slave time: %s' % repr(item[1]))
+ self.upd_stime(item[1][1], item[1][0])
+ else:
+ logging.warn('unknown tuple in comlist (%s)' % repr(item))
+ except IndexError:
+ time.sleep(1)
+
def write_entry_change(self, prefix, data=[]):
self.fh.write("%s %s\n" % (prefix, ' '.join(data)))
@@ -839,24 +1150,61 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
def fname(self):
return self.xsync_change
- def crawl(self, path='.', xtr=None, done=0):
- """ generate a CHANGELOG file consumable by process_change """
+ def put(self, mark, item):
+ self.comlist.append((mark, item))
+
+ def sync_xsync(self, last):
+ """schedule a processing of changelog"""
+ self.close()
+ self.put('xsync', self.fname())
+ self.counter = 0
+ if not last:
+ time.sleep(1) # make sure changelogs are 1 second apart
+ self.open()
+
+ def sync_stime(self, stime=None, last=False):
+ """schedule a stime synchronization"""
+ if stime:
+ self.put('stime', stime)
+ if last:
+ self.put('finale', None)
+
+ def sync_done(self, stime=None, last=False):
+ self.sync_xsync(last)
+ if stime:
+ self.sync_stime(stime, last)
+
+ def Xcrawl(self, path='.', xtr_root=None):
+ """
+ generate a CHANGELOG file consumable by process_change.
+
+ slave's xtime (stime) is _cached_ for comparisons across
+ the filesystem tree, but set after directory synchronization.
+ """
if path == '.':
self.open()
self.crawls += 1
- if not xtr:
+ if not xtr_root:
# get the root stime and use it for all comparisons
- xtr = self.xtime('.', self.slave)
- if isinstance(xtr, int):
- if xtr != ENOENT:
- raise GsyncdError('slave is corrupt')
- xtr = self.minus_infinity
+ xtr_root = self.xtime('.', self.slave)
+ if isinstance(xtr_root, int):
+ if xtr_root != ENOENT:
+ logging.warn("slave cluster not returning the " \
+ "correct xtime for root (%d)" % xtr_root)
+ xtr_root = self.minus_infinity
xtl = self.xtime(path)
if isinstance(xtl, int):
- raise GsyncdError('master is corrupt')
- if xtr == xtl:
+ logging.warn("master cluster's xtime not found")
+ xtr = self.xtime(path, self.slave)
+ if isinstance(xtr, int):
+ if xtr != ENOENT:
+ logging.warn("slave cluster not returning the " \
+ "correct xtime for %s (%d)" % (path, xtr))
+ xtr = self.minus_infinity
+ xtr = max(xtr, xtr_root)
+ if not self.need_sync(path, xtl, xtr):
if path == '.':
- self.close()
+ self.sync_done((path, xtl), True)
return
self.xtime_reversion_hook(path, xtl, xtr)
logging.debug("entering " + path)
@@ -867,43 +1215,42 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
for e in dem:
bname = e
e = os.path.join(path, e)
- st = lstat(e)
+ xte = self.xtime(e)
+ if isinstance(xte, int):
+ logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte]))
+ continue
+ if not self.need_sync(e, xte, xtr):
+ continue
+ st = self.master.server.lstat(e)
if isinstance(st, int):
- logging.warn('%s got purged in the interim..' % e)
+ logging.warn('%s got purged in the interim ...' % e)
continue
gfid = self.master.server.gfid(e)
if isinstance(gfid, int):
- logging.warn('skipping entry %s..' % (e))
- continue
- xte = self.xtime(e)
- if isinstance(xte, int):
- raise GsyncdError('master is corrupt')
- if not self.need_sync(e, xte, xtr):
+ logging.warn('skipping entry %s..' % e)
continue
mo = st.st_mode
+ self.counter += 1
+ if self.counter == self.XSYNC_MAX_ENTRIES:
+ self.sync_done()
if stat.S_ISDIR(mo):
- self.write_entry_change("E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))])
- self.crawl(e, xtr)
+ self.write_entry_change("E", [gfid, 'MKDIR', str(mo), str(st.st_uid), str(st.st_gid), escape(os.path.join(pargfid, bname))])
+ self.Xcrawl(e, xtr_root)
+ self.sync_done((e, xte), False)
elif stat.S_ISLNK(mo):
- rl = errno_wrap(os.readlink, [en], [ENOENT])
- if isinstance(rl, int):
- continue
- self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname)), rl])
- else:
+ self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname))])
+ elif stat.S_ISREG(mo):
+ nlink = st.st_nlink
+ nlink -= 1 # fixup backend stat link count
# if a file has a hardlink, create a Changelog entry as 'LINK' so the slave
# side will decide if to create the new entry, or to create link.
- if st.st_nlink == 1:
- self.write_entry_change("E", [gfid, 'MKNOD', escape(os.path.join(pargfid, bname))])
+ if nlink == 1:
+ self.write_entry_change("E", [gfid, 'MKNOD', str(mo), str(st.st_uid), str(st.st_gid), escape(os.path.join(pargfid, bname))])
else:
self.write_entry_change("E", [gfid, 'LINK', escape(os.path.join(pargfid, bname))])
- if stat.S_ISREG(mo):
- self.write_entry_change("D", [gfid])
-
+ self.write_entry_change("D", [gfid])
if path == '.':
- logging.info('processing xsync changelog %s' % self.fname())
- self.close()
- self.process([self.fname()], done)
- self.upd_stime(xtl)
+ self.sync_done((path, xtl), True)
class BoxClosedErr(Exception):
pass
@@ -979,12 +1326,13 @@ class Syncer(object):
each completed syncjob.
"""
- def __init__(self, slave):
+ def __init__(self, slave, sync_engine, resilient_errnos=[]):
"""spawn worker threads"""
self.slave = slave
self.lock = Lock()
self.pb = PostBox()
- self.bytes_synced = 0
+ self.sync_engine = sync_engine
+ self.errnos_ok = resilient_errnos
for i in range(int(gconf.sync_jobs)):
t = Thread(target=self.syncjob)
t.start()
@@ -1002,11 +1350,10 @@ class Syncer(object):
break
time.sleep(0.5)
pb.close()
- po = self.slave.rsync(pb)
+ po = self.sync_engine(pb)
if po.returncode == 0:
ret = (True, 0)
- elif po.returncode in (23, 24):
- # partial transfer (cf. rsync(1)), that's normal
+ elif po.returncode in self.errnos_ok:
ret = (False, po.returncode)
else:
po.errfail()
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index faf62f868..8deb5114b 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -265,6 +265,9 @@ class Server(object):
FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+ GFID_XATTR = 'trusted.gfid' # for backend gfid fetch, do not use GX_NSPACE_PFX
+ GFID_FMTSTR = "!" + "B"*16
+
local_path = ''
@classmethod
@@ -305,6 +308,38 @@ class Server(object):
raise OSError(ENOTDIR, os.strerror(ENOTDIR))
return os.listdir(path)
+
+ @classmethod
+ @_pathguard
+ def lstat(cls, path):
+ try:
+ return os.lstat(path)
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return ex.errno
+ else:
+ raise
+
+
+ @classmethod
+ @_pathguard
+ def gfid(cls, path):
+ try:
+ buf = Xattr.lgetxattr(path, cls.GFID_XATTR, 16)
+ m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in struct.unpack(cls.GFID_FMTSTR, buf)]))
+ return '-'.join(m.groups())
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ def gfid_mnt(cls, gfidpath):
+ return errno_wrap(Xattr.lgetxattr, [gfidpath, 'glusterfs.gfid.string', cls.GX_GFID_CANONICAL_LEN], [ENOENT])
+
@classmethod
@_pathguard
def purge(cls, path, entries=None):
@@ -397,8 +432,42 @@ class Server(object):
raise
@classmethod
- def gfid(cls, gfidpath):
- return errno_wrap(Xattr.lgetxattr, [gfidpath, 'glusterfs.gfid.string', cls.GX_GFID_CANONICAL_LEN], [ENOENT])
+ @_pathguard
+ def stime_mnt(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'stime']), 8))
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def stime(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'stime']), 8))
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
@classmethod
def node_uuid(cls, path='.'):
@@ -409,21 +478,10 @@ class Server(object):
raise
@classmethod
- def xtime_vec(cls, path, *uuids):
- """vectored version of @xtime
-
- accepts a list of uuids and returns a dictionary
- with uuid as key(s) and xtime as value(s)
- """
- xt = {}
- for uuid in uuids:
- xtu = cls.xtime(path, uuid)
- if xtu == ENODATA:
- xtu = None
- if isinstance(xtu, int):
- return xtu
- xt[uuid] = xtu
- return xt
+ @_pathguard
+ def set_stime(cls, path, uuid, mark):
+ """set @mark as stime for @uuid on @path"""
+ Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'stime']), struct.pack('!II', *mark))
@classmethod
@_pathguard
@@ -444,20 +502,16 @@ class Server(object):
Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
@classmethod
- def set_xtime_vec(cls, path, mark_dct):
- """vectored (or dictered) version of set_xtime
-
- ignore values that match @ignore
- """
- for u,t in mark_dct.items():
- cls.set_xtime(path, u, t)
-
- @classmethod
def entry_ops(cls, entries):
pfx = gauxpfx()
logging.debug('entries: %s' % repr(entries))
# regular file
- def entry_pack_reg(gf, bn, st):
+ def entry_pack_reg(gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ def entry_pack_reg_stat(gf, bn, st):
blen = len(bn)
mo = st['mode']
return struct.pack(cls._fmt_mknod(blen),
@@ -465,12 +519,10 @@ class Server(object):
gf, mo, bn,
stat.S_IMODE(mo), 0, umask())
# mkdir
- def entry_pack_mkdir(gf, bn, st):
+ def entry_pack_mkdir(gf, bn, mo, uid, gid):
blen = len(bn)
- mo = st['mode']
return struct.pack(cls._fmt_mkdir(blen),
- st['uid'], st['gid'],
- gf, mo, bn,
+ uid, gid, gf, mo, bn,
stat.S_IMODE(mo), umask())
#symlink
def entry_pack_symlink(gf, bn, lnk, st):
@@ -485,7 +537,7 @@ class Server(object):
# to be purged is the GFID gotten from the changelog.
# (a stat(changelog_gfid) would also be valid here)
# The race here is between the GFID check and the purge.
- disk_gfid = cls.gfid(entry)
+ disk_gfid = cls.gfid_mnt(entry)
if isinstance(disk_gfid, int):
return
if not gfid == disk_gfid:
@@ -510,15 +562,15 @@ class Server(object):
else:
break
elif op in ['CREATE', 'MKNOD']:
- blob = entry_pack_reg(gfid, bname, e['stat'])
+ blob = entry_pack_reg(gfid, bname, e['mode'], e['uid'], e['uid'])
elif op == 'MKDIR':
- blob = entry_pack_mkdir(gfid, bname, e['stat'])
+ blob = entry_pack_mkdir(gfid, bname, e['mode'], e['uid'], e['uid'])
elif op == 'LINK':
slink = os.path.join(pfx, gfid)
st = lstat(slink)
if isinstance(st, int):
(pg, bname) = entry2pb(entry)
- blob = entry_pack_reg(gfid, bname, e['stat'])
+ blob = entry_pack_reg_stat(gfid, bname, e['stat'])
else:
errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST])
elif op == 'SYMLINK':
@@ -528,13 +580,24 @@ class Server(object):
st = lstat(entry)
if isinstance(st, int):
(pg, bname) = entry2pb(en)
- blob = entry_pack_reg(gfid, bname, e['stat'])
+ blob = entry_pack_reg_stat(gfid, bname, e['stat'])
else:
errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST])
if blob:
errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
@classmethod
+ def meta_ops(cls, meta_entries):
+ logging.debug('Meta-entries: %s' % repr(meta_entries))
+ for e in meta_entries:
+ mode = e['stat']['mode']
+ uid = e['stat']['uid']
+ gid = e['stat']['gid']
+ go = e['go']
+ errno_wrap(os.chmod, [go, mode], [ENOENT], [ESTALE, EINVAL])
+ errno_wrap(os.chown, [go, uid, gid], [ENOENT], [ESTALE, EINVAL])
+
+ @classmethod
def changelog_register(cls, cl_brick, cl_dir, cl_log, cl_level, retries = 0):
Changes.cl_register(cl_brick, cl_dir, cl_log, cl_level, retries)
@@ -699,6 +762,29 @@ class SlaveRemote(object):
return po
+ def tarssh(self, files, slaveurl):
+ """invoke tar+ssh
+ -z (compress) can be use if needed, but ommitting it now
+ as it results in wierd error (tar+ssh errors out (errcode: 2)
+ """
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+ (host, rdir) = slaveurl.split(':')
+ tar_cmd = ["tar", "-cf", "-", "--files-from", "-"]
+ ssh_cmd = gconf.ssh_command_tar.split() + [host, "tar", "--overwrite", "-xf", "-", "-C", rdir]
+ p0 = Popen(tar_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+ p1 = Popen(ssh_cmd, stdin=p0.stdout, stderr=subprocess.PIPE)
+ for f in files:
+ p0.stdin.write(f)
+ p0.stdin.write('\n')
+ p0.stdin.close()
+ p0.wait()
+
+ p1.wait()
+ p1.terminate_geterr(fail_on_err = False)
+
+ return p1
class AbstractUrl(object):
"""abstract base class for url scheme classes"""
@@ -1041,12 +1127,20 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
except ValueError:
pass
return e
+ @classmethod
+ def lstat(cls, e):
+ """ path based backend stat """
+ return super(brickserver, cls).lstat(e)
+ @classmethod
+ def gfid(cls, e):
+ """ path based backend gfid fetch """
+ return super(brickserver, cls).gfid(e)
if gconf.slave_id:
# define {,set_}xtime in slave, thus preempting
# the call to remote, so that it takes data from
# the local brick
- slave.server.xtime = types.MethodType(lambda _self, path, uuid: brickserver.xtime(path, uuid + '.' + gconf.slave_id), slave.server)
- slave.server.set_xtime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_xtime(path, uuid + '.' + gconf.slave_id, mark), slave.server)
+ slave.server.stime = types.MethodType(lambda _self, path, uuid: brickserver.stime(path, uuid + '.' + gconf.slave_id), slave.server)
+ slave.server.set_stime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_stime(path, uuid + '.' + gconf.slave_id, mark), slave.server)
(g1, g2) = self.gmaster_instantiate_tuple(slave)
g1.master.server = brickserver
g2.master.server = brickserver
@@ -1067,6 +1161,9 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
def rsync(self, files):
return sup(self, files, self.slavedir)
+ def tarssh(self, files):
+ return sup(self, files, self.slavedir)
+
class SSH(AbstractUrl, SlaveRemote):
"""scheme class for ssh:// urls
@@ -1170,3 +1267,6 @@ class SSH(AbstractUrl, SlaveRemote):
def rsync(self, files):
return sup(self, files, '-e', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args),
*(gconf.rsync_ssh_options.split() + [self.slaveurl]))
+
+ def tarssh(self, files):
+ return sup(self, files, self.slaveurl)
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 348eb38c1..1b5684c6d 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -227,7 +227,7 @@ def log_raise_exception(excont):
logging.warn("!!!!!!!!!!!!!")
logging.warn('!!! getting "No such file or directory" errors '
"is most likely due to MISCONFIGURATION, please consult "
- "http://access.redhat.com/knowledge/docs/en-US/Red_Hat_Storage/2.0/html/Administration_Guide/chap-User_Guide-Geo_Rep-Preparation-Settingup_Environment.html")
+ "https://access.redhat.com/site/documentation/en-US/Red_Hat_Storage/2.1/html/Administration_Guide/chap-User_Guide-Geo_Rep-Preparation-Settingup_Environment.html")
logging.warn("!!!!!!!!!!!!!")
gconf.transport.terminate_geterr()
elif isinstance(exc, OSError) and exc.errno in (ENOTCONN, ECONNABORTED):
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index e6dbd5f35..43817a4cf 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -3,7 +3,7 @@
%global _for_fedora_koji_builds 0
# uncomment and add '%' to use the prereltag for pre-releases
-# global prereltag beta4
+# %%global prereltag qa3
# if you wish to compile an rpm without rdma support, compile like this...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without rdma
@@ -46,7 +46,7 @@
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without bd
%{?_without_bd:%global _without_bd --disable-bd-xlator}
-%if ( 0%{?rhel} && 0%{?rhel} < 6 )
+%if ( 0%{?rhel} && 0%{?rhel} < 6 || 0%{?sles_version} )
%define _without_bd --disable-bd-xlator
%endif
@@ -65,15 +65,15 @@
# From https://fedoraproject.org/wiki/Packaging:Python#Macros
%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
-%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
-%{!?python_sitearch: %global python_sitearch %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
+%{!?python_sitelib: %global python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
+%{!?python_sitearch: %global python_sitearch %(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
%endif
Summary: Cluster File System
%if ( 0%{_for_fedora_koji_builds} )
Name: glusterfs
-Version: 3.4.1
-Release: 3%{?prereltag:.%{prereltag}}%{?dist}
+Version: 3.5.0
+Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
Vendor: Fedora Project
%else
Name: @PACKAGE_NAME@
@@ -94,9 +94,6 @@ Source5: glusterfsd.logrotate
Source6: rhel5-load-fuse-modules
Source11: glusterfsd.service
Source13: glusterfsd.init
-Patch0: %{name}-3.2.5.configure.ac.patch
-Patch1: %{name}-3.3.0.libglusterfs.Makefile.patch
-Patch2: %{name}-3.3.1.rpc.rpcxprt.rdma.name.c.patch
%else
Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
%endif
@@ -118,7 +115,7 @@ Requires(postun): systemd-units
%define _init_disable() /bin/systemctl disable %1.service ;
%define _init_restart() /bin/systemctl try-restart %1.service ;
%define _init_stop() /bin/systemctl stop %1.service ;
-%define _init_install() %{__install} -D -p -m 0644 %1 %{buildroot}%{_unitdir}/%2.service ;
+%define _init_install() install -D -p -m 0644 %1 %{buildroot}%{_unitdir}/%2.service ;
# can't seem to make a generic macro that works
%define _init_glusterd %{_unitdir}/glusterd.service
%define _init_glusterfsd %{_unitdir}/glusterfsd.service
@@ -134,7 +131,7 @@ Requires(postun): /sbin/service
%define _init_disable() /sbin/chkconfig --del %1 ;
%define _init_restart() /sbin/service %1 condrestart &>/dev/null ;
%define _init_stop() /sbin/service %1 stop &>/dev/null ;
-%define _init_install() %{__install} -D -p -m 0755 %1 %{buildroot}%{_sysconfdir}/init.d/%2 ;
+%define _init_install() install -D -p -m 0755 %1 %{buildroot}%{_sysconfdir}/init.d/%2 ;
# can't seem to make a generic macro that works
%define _init_glusterd %{_sysconfdir}/init.d/glusterd
%define _init_glusterfsd %{_sysconfdir}/init.d/glusterfsd
@@ -158,11 +155,10 @@ BuildRequires: lvm2-devel
BuildRequires: glib2-devel
%endif
-Obsoletes: hekafs <= 0.7
-Obsoletes: %{name}-libs <= 2.0.0
+Obsoletes: hekafs
Obsoletes: %{name}-common < %{version}-%{release}
Obsoletes: %{name}-core < %{version}-%{release}
-Provides: %{name}-libs = %{version}-%{release}
+Obsoletes: %{name}-ufo
Provides: %{name}-common = %{version}-%{release}
Provides: %{name}-core = %{version}-%{release}
@@ -173,14 +169,16 @@ Provides: %{name}-core = %{version}-%{release}
%if ( 0%{?rhel} == 6 )
# filter_setup exists in RHEL6 only
%filter_provides_in %{_libdir}/glusterfs/%{version}/
- %global __filter_from_req %{?__filter_from_req} | %{__grep} -v -P '^(?!lib).*\.so.*$'
+ %global __filter_from_req %{?__filter_from_req} | grep -v -P '^(?!lib).*\.so.*$'
%filter_setup
%else
- # modern rpm and current Fedora do not generate requires if the provides
- # are filtered
+ # modern rpm and current Fedora do not generate requires when the
+ # provides are filtered
%global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$
%endif
+%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}
+
%if ( 0%{?rhel} && 0%{?rhel} < 6 )
# _sharedstatedir is not provided by RHEL5
%define _sharedstatedir /var/lib
@@ -346,7 +344,7 @@ This package provides the glusterfs libgfapi library.
%package resource-agents
Summary: OCF Resource Agents for GlusterFS
License: GPLv3+
-%if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 ) )
+%if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 || 0%{?sles_version} ) )
# EL5 does not support noarch sub-packages
BuildArch: noarch
%endif
@@ -423,13 +421,6 @@ regression testing of Gluster.
%prep
%setup -q -n %{name}-%{version}%{?prereltag}
-%if ( 0%{_for_fedora_koji_builds} )
-#%patch0 -p0
-%patch1 -p0 -F4
-%if ( "%{version}" == "3.3.1" )
-%patch2 -p1
-%endif
-%endif
%build
./autogen.sh
@@ -446,67 +437,74 @@ regression testing of Gluster.
# fix hardening and remove rpath in shlibs
%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
-%{__sed} -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool
+sed -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool
%endif
-%{__sed} -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool
-%{__sed} -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool
+sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool
+sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool
-%{__make} %{?_smp_mflags}
+make %{?_smp_mflags}
pushd api/examples
-FLAGS="$RPM_OPT_FLAGS" %{__python} setup.py build
+FLAGS="$RPM_OPT_FLAGS" python setup.py build
popd
%install
-%{__rm} -rf %{buildroot}
-%{__make} install DESTDIR=%{buildroot}
+rm -rf %{buildroot}
+make install DESTDIR=%{buildroot}
# install the gfapi Python library in /usr/lib/python*/site-packages
pushd api/examples
-%{__python} setup.py install --skip-build --verbose --root %{buildroot}
+python setup.py install --skip-build --verbose --root %{buildroot}
popd
# Install include directory
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs
-%{__install} -p -m 0644 libglusterfs/src/*.h \
+mkdir -p %{buildroot}%{_includedir}/glusterfs
+install -p -m 0644 libglusterfs/src/*.h \
%{buildroot}%{_includedir}/glusterfs/
-%{__install} -p -m 0644 contrib/uuid/*.h \
+install -p -m 0644 contrib/uuid/*.h \
%{buildroot}%{_includedir}/glusterfs/
# Following needed by hekafs multi-tenant translator
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/rpc
-%{__install} -p -m 0644 rpc/rpc-lib/src/*.h \
+mkdir -p %{buildroot}%{_includedir}/glusterfs/rpc
+install -p -m 0644 rpc/rpc-lib/src/*.h \
%{buildroot}%{_includedir}/glusterfs/rpc/
-%{__install} -p -m 0644 rpc/xdr/src/*.h \
+install -p -m 0644 rpc/xdr/src/*.h \
%{buildroot}%{_includedir}/glusterfs/rpc/
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/server
-%{__install} -p -m 0644 xlators/protocol/server/src/*.h \
+mkdir -p %{buildroot}%{_includedir}/glusterfs/server
+install -p -m 0644 xlators/protocol/server/src/*.h \
%{buildroot}%{_includedir}/glusterfs/server/
%if ( 0%{_for_fedora_koji_builds} )
-%{__install} -D -p -m 0644 %{SOURCE1} \
+install -D -p -m 0644 %{SOURCE1} \
%{buildroot}%{_sysconfdir}/sysconfig/glusterd
-%{__install} -D -p -m 0644 %{SOURCE2} \
+install -D -p -m 0644 %{SOURCE2} \
%{buildroot}%{_sysconfdir}/sysconfig/glusterfsd
%else
-%{__install} -D -p -m 0644 extras/glusterd-sysconfig \
+install -D -p -m 0644 extras/glusterd-sysconfig \
%{buildroot}%{_sysconfdir}/sysconfig/glusterd
%endif
%if ( 0%{_for_fedora_koji_builds} )
%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
-%{__install} -D -p -m 0755 %{SOURCE6} \
+install -D -p -m 0755 %{SOURCE6} \
%{buildroot}%{_sysconfdir}/sysconfig/modules/glusterfs-fuse.modules
%endif
%endif
-%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterd
-%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfs
-%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfsd
-%{__mkdir_p} %{buildroot}%{_localstatedir}/run/gluster
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterd
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfs
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfsd
+mkdir -p %{buildroot}%{_localstatedir}/run/gluster
# Remove unwanted files from all the shared libraries
find %{buildroot}%{_libdir} -name '*.a' -delete
find %{buildroot}%{_libdir} -name '*.la' -delete
-# Remove installed docs, they're included by %%doc
-%{__rm} -rf %{buildroot}%{_datadir}/doc/glusterfs/
+# Remove installed docs, the ones we want are included by %%doc, in
+# /usr/share/doc/glusterfs or /usr/share/doc/glusterfs-x.y.z depending
+# on the distribution
+%if ( 0%{?fedora} && 0%{?fedora} > 19 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+rm -rf %{buildroot}%{_pkgdocdir}/*
+%else
+rm -rf %{buildroot}%{_defaultdocdir}/%{name}
+mkdir -p %{buildroot}%{_pkgdocdir}
+%endif
head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog
cat << EOM >> ChangeLog
@@ -515,12 +513,12 @@ https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prerelt
EOM
# Remove benchmarking and other unpackaged files
-%{__rm} -rf %{buildroot}/benchmarking
-%{__rm} -f %{buildroot}/glusterfs-mode.el
-%{__rm} -f %{buildroot}/glusterfs.vim
+rm -rf %{buildroot}/benchmarking
+rm -f %{buildroot}/glusterfs-mode.el
+rm -f %{buildroot}/glusterfs.vim
# Create working directory
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd
# Update configuration file to /var/lib working directory
sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \
@@ -533,41 +531,41 @@ sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sha
%if ( 0%{_for_fedora_koji_builds} )
# Client logrotate entry
-%{__install} -D -p -m 0644 %{SOURCE3} \
+install -D -p -m 0644 %{SOURCE3} \
%{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-fuse
# Server logrotate entry
-%{__install} -D -p -m 0644 %{SOURCE4} \
+install -D -p -m 0644 %{SOURCE4} \
%{buildroot}%{_sysconfdir}/logrotate.d/glusterd
# Legacy server logrotate entry
-%{__install} -D -p -m 0644 %{SOURCE5} \
+install -D -p -m 0644 %{SOURCE5} \
%{buildroot}%{_sysconfdir}/logrotate.d/glusterfsd
%else
-%{__install} -D -p -m 0644 extras/glusterfs-logrotate \
+install -D -p -m 0644 extras/glusterfs-logrotate \
%{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
%endif
%if ( 0%{!?_without_georeplication:1} )
# geo-rep ghosts
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
-%{__install} -D -p -m 0644 extras/glusterfs-georep-logrotate \
+install -D -p -m 0644 extras/glusterfs-georep-logrotate \
%{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
%endif
%if ( 0%{!?_without_syslog:1} )
%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
-%{__install} -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \
+install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \
%{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
%endif
%if ( 0%{?rhel} && 0%{?rhel} == 6 )
-%{__install} -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \
+install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \
%{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
%endif
%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
-%{__install} -D -p -m 0644 extras/logger.conf.example \
+install -D -p -m 0644 extras/logger.conf.example \
%{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example
%endif
%endif
@@ -575,47 +573,47 @@ touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.co
# the rest of the ghosts
touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
touch %{buildroot}%{_sharedstatedir}/glusterd/options
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/glustershd
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/peers
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/vols
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/groups
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/nfs/run
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/glustershd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/peers
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/vols
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/groups
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/nfs/run
touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol
touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
%clean
-%{__rm} -rf %{buildroot}
+rm -rf %{buildroot}
%post
/sbin/ldconfig
@@ -634,8 +632,7 @@ find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glus
%endif
%files
-%defattr(-,root,root,-)
-%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README THANKS
+%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README THANKS extras/clear_xattrs.sh
%config(noreplace) %{_sysconfdir}/logrotate.d/*
%config(noreplace) %{_sysconfdir}/sysconfig/*
%if ( 0%{!?_without_syslog:1} )
@@ -684,7 +681,6 @@ find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glus
%if ( 0%{!?_without_rdma:1} )
%files rdma
-%defattr(-,root,root,-)
%{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
%endif
@@ -696,7 +692,6 @@ if [ $1 -ge 1 ]; then
fi
%files geo-replication
-%defattr(-,root,root)
%{_libexecdir}/glusterfs/gsyncd
%{_libexecdir}/glusterfs/python/syncdaemon/*
%{_libexecdir}/glusterfs/gverify.sh
@@ -707,7 +702,6 @@ fi
%endif
%files fuse
-%defattr(-,root,root,-)
%if ( 0%{_for_fedora_koji_builds} )
%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-fuse
%endif
@@ -723,8 +717,6 @@ fi
%endif
%files server
-%defattr(-,root,root,-)
-%doc extras/clear_xattrs.sh
%if ( 0%{_for_fedora_koji_builds} )
%config(noreplace) %{_sysconfdir}/logrotate.d/glusterd
%endif
@@ -751,9 +743,9 @@ fi
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/nsr_recon.so
%ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info
%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
-# This is really ugly, but I have no idea how to mark these directories in an
-# other way. They should belong to the glusterfs-server package, but don't
-# exist after installation. They are generated on the first start...
+# This is really ugly, but I have no idea how to mark these directories in
+# any other way. They should belong to the glusterfs-server package, but
+# don't exist after installation. They are generated on the first start...
%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks
%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1
%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
@@ -800,13 +792,11 @@ fi
%if ( 0%{!?_without_ocf:1} )
%files resource-agents
-%defattr(-,root,root)
# /usr/lib is the standard for OCF, also on x86_64
%{_prefix}/lib/ocf/resource.d/glusterfs
%endif
%files devel
-%defattr(-,root,root,-)
%{_includedir}/glusterfs
%exclude %{_includedir}/glusterfs/y.tab.h
%exclude %{_includedir}/glusterfs/api
@@ -820,7 +810,6 @@ fi
%{_includedir}/glusterfs/api/*
%files regression-tests
-%defattr(-,root,root,-)
%{_prefix}/share/glusterfs/*
%exclude %{_prefix}/share/glusterfs/tests/basic/rpm.t
@@ -835,14 +824,14 @@ fi
# in gluster.org RPMs.) Be careful to copy them on the off chance that
# /etc and /var/lib are on separate file systems
if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then
- %{__mkdir_p} %{_sharedstatedir}/glusterd
+ mkdir -p %{_sharedstatedir}/glusterd
cp -a /etc/glusterd %{_sharedstatedir}/glusterd
rm -rf /etc/glusterd
ln -sf %{_sharedstatedir}/glusterd /etc/glusterd
fi
# Rename old volfiles in an RPM-standard way. These aren't actually
-# considered package config files, so %config doesn't work for them.
+# considered package config files, so %%config doesn't work for them.
if [ -d %{_sharedstatedir}/glusterd/vols ]; then
for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do
newfile=${file}.rpmsave
@@ -887,7 +876,13 @@ if [ $1 -ge 1 ]; then
fi
%changelog
-* Wed Oct 11 2013 Harshavardhana <fharshav@redhat.com>
+* Sat Jan 4 2014 Niels de Vos <ndevos@redhat.com>
+- The main glusterfs package should not provide glusterfs-libs (#1048489)
+
+* Tue Dec 10 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec 3.5.0-0.1.qa3
+
+* Fri Oct 11 2013 Harshavardhana <fharshav@redhat.com>
- Add '_sharedstatedir' macro to `/var/lib` on <= RHEL5 (#1003184)
* Wed Oct 9 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
@@ -916,16 +911,16 @@ fi
- Sync with Fedora glusterfs.spec, add glusterfs-libs RPM for oVirt/qemu-kvm
* Thu Jul 25 2013 Csaba Henk <csaba@redhat.com>
-- Added peer_add_secret_pub and peer_gsec_create to %{_libexecdir}/glusterfs
+- Added peer_add_secret_pub and peer_gsec_create to %%{_libexecdir}/glusterfs
* Thu Jul 25 2013 Aravinda VK <avishwan@redhat.com>
-- Added gverify.sh to %{_libexecdir}/glusterfs directory.
+- Added gverify.sh to %%{_libexecdir}/glusterfs directory.
* Thu Jul 25 2013 Harshavardhana <fharshav@redhat.com>
- Allow to build with '--without bd' to disable 'bd' xlator
* Thu Jun 27 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
-- fix the hardening fix for shlibs, use %%{__sed} macro, shorter ChangeLog
+- fix the hardening fix for shlibs, use %%sed macro, shorter ChangeLog
* Wed Jun 26 2013 Niels de Vos <ndevos@redhat.com>
- move the mount/api xlator to glusterfs-api
diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/client_t.h
index f7812f8f0..548081896 100644
--- a/libglusterfs/src/client_t.h
+++ b/libglusterfs/src/client_t.h
@@ -60,6 +60,7 @@ struct clienttable {
gf_lock_t lock;
cliententry_t *cliententries;
int first_free;
+ client_t *local;
};
typedef struct clienttable clienttable_t;
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index f2df5a6d4..e9fc1222d 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -1121,8 +1121,8 @@ dict_foreach (dict_t *dict,
while (pairs) {
next = pairs->next;
ret = fn (dict, pairs->key, pairs->value, data);
- if (ret == -1)
- return -1;
+ if (ret < 0)
+ return ret;
pairs = next;
}
diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c
index 82baa521a..e89df09c9 100644
--- a/libglusterfs/src/event-history.c
+++ b/libglusterfs/src/event-history.c
@@ -29,6 +29,7 @@ eh_new (size_t buffer_size, gf_boolean_t use_buffer_once,
gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed");
GF_FREE (history);
history = NULL;
+ goto out;
}
history->buffer = buffer;
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 33d2087fc..8f3a2e6fa 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -84,15 +84,18 @@
#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
#define GF_XATTR_LOCKINFO_KEY "trusted.glusterfs.lockinfo"
-#define GF_XATTR_GET_REAL_FILENAME_KEY "user.glusterfs.get_real_filename:"
+#define GF_XATTR_GET_REAL_FILENAME_KEY "glusterfs.get_real_filename:"
+#define GF_XATTR_USER_PATHINFO_KEY "glusterfs.pathinfo"
#define QUOTA_LIMIT_KEY "trusted.glusterfs.quota.limit-set"
#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
#define BD_XATTR_KEY "user.glusterfs"
-#define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \
- strlen (GF_XATTR_PATHINFO_KEY)) == 0)
+#define XATTR_IS_PATHINFO(x) ((strncmp (x, GF_XATTR_PATHINFO_KEY, \
+ strlen (x)) == 0) || \
+ (strncmp (x, GF_XATTR_USER_PATHINFO_KEY, \
+ strlen (x)) == 0))
#define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \
strlen (GF_XATTR_NODE_UUID_KEY)) == 0)
#define XATTR_IS_LOCKINFO(x) (strncmp (x, GF_XATTR_LOCKINFO_KEY, \
@@ -452,7 +455,6 @@ struct _glusterfs_ctx {
int daemon_pipe[2];
- struct client_disconnect *client_disconnect;
struct clienttable *clienttable;
};
typedef struct _glusterfs_ctx glusterfs_ctx_t;
diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
index e3a4a9fde..0058233a7 100644
--- a/libglusterfs/src/logging.c
+++ b/libglusterfs/src/logging.c
@@ -108,11 +108,44 @@ gf_log_set_xl_loglevel (void *this, gf_loglevel_t level)
}
void
-gf_log_fini (void)
+gf_log_globals_fini (void)
{
pthread_mutex_destroy (&THIS->ctx->log.logfile_mutex);
}
+/** gf_log_fini - function to perform the cleanup of the log information
+ * @data - glusterfs context
+ * @return: success: 0
+ * failure: -1
+ */
+int
+gf_log_fini (void *data)
+{
+ glusterfs_ctx_t *ctx = data;
+ int ret = 0;
+
+ if (ctx == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ if (fclose (ctx->log.logfile) != 0)
+ ret = -1;
+ /* Logfile needs to be set to NULL, so that any
+ call to gf_log after calling gf_log_fini, will
+ log the message to stderr.
+ */
+ ctx->log.logfile = NULL;
+ }
+ }
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
+
+ out:
+ return ret;
+}
#ifdef GF_USE_SYSLOG
/**
diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/logging.h
index cc806a767..e2b7e664d 100644
--- a/libglusterfs/src/logging.h
+++ b/libglusterfs/src/logging.h
@@ -153,6 +153,8 @@ int gf_cmd_log_init (const char *filename);
void set_sys_log_level (gf_loglevel_t level);
+int gf_log_fini(void *data);
+
#define GF_DEBUG(xl, format, args...) \
gf_log ((xl)->name, GF_LOG_DEBUG, format, ##args)
#define GF_INFO(xl, format, args...) \
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
index fc0aa9018..726d38eb6 100644
--- a/libglusterfs/src/mem-types.h
+++ b/libglusterfs/src/mem-types.h
@@ -118,6 +118,7 @@ enum gf_common_mem_types_ {
gf_common_mt_auxgids = 102,
gf_common_mt_syncopctx = 103,
gf_common_mt_iobrefs = 104,
- gf_common_mt_end = 105
+ gf_common_mt_gsync_status_t = 105,
+ gf_common_mt_end = 106
};
#endif
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
index 48c79ee02..5af23592b 100644
--- a/libglusterfs/src/store.c
+++ b/libglusterfs/src/store.c
@@ -62,8 +62,8 @@ gf_store_mkstemp (gf_store_handle_t *shandle)
int fd = -1;
char tmppath[PATH_MAX] = {0,};
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->path);
+ GF_VALIDATE_OR_GOTO ("store", shandle, out);
+ GF_VALIDATE_OR_GOTO ("store", shandle->path, out);
snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
fd = open (tmppath, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0600);
@@ -71,7 +71,7 @@ gf_store_mkstemp (gf_store_handle_t *shandle)
gf_log ("", GF_LOG_ERROR, "Failed to open %s, error: %s",
tmppath, strerror (errno));
}
-
+out:
return fd;
}
@@ -127,8 +127,8 @@ gf_store_rename_tmppath (gf_store_handle_t *shandle)
int32_t ret = -1;
char tmppath[PATH_MAX] = {0,};
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->path);
+ GF_VALIDATE_OR_GOTO ("store", shandle, out);
+ GF_VALIDATE_OR_GOTO ("store", shandle->path, out);
snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
ret = rename (tmppath, shandle->path);
@@ -149,8 +149,8 @@ gf_store_unlink_tmppath (gf_store_handle_t *shandle)
int32_t ret = -1;
char tmppath[PATH_MAX] = {0,};
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->path);
+ GF_VALIDATE_OR_GOTO ("store", shandle, out);
+ GF_VALIDATE_OR_GOTO ("store", shandle->path, out);
snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
ret = unlink (tmppath);
@@ -160,7 +160,7 @@ gf_store_unlink_tmppath (gf_store_handle_t *shandle)
} else {
ret = 0;
}
-
+out:
return ret;
}
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index e8954cc23..d1b9ef84c 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -120,7 +120,18 @@ sys_rename (const char *oldpath, const char *newpath)
int
sys_link (const char *oldpath, const char *newpath)
{
+#ifdef HAVE_LINKAT
+ /*
+ * On most systems (Linux being the notable exception), link(2)
+ * first resolves symlinks. If the target is a directory or
+ * is nonexistent, it will fail. linkat(2) operates on the
+ * symlink instead of its target when the AT_SYMLINK_FOLLOW
+ * flag is not supplied.
+ */
+ return linkat (AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
+#else
return link (oldpath, newpath);
+#endif
}
diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c
index db488434c..48871ffb3 100644
--- a/rpc/rpc-lib/src/auth-glusterfs.c
+++ b/rpc/rpc-lib/src/auth-glusterfs.c
@@ -50,12 +50,6 @@ ret:
int
auth_glusterfs_request_init (rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
return 0;
}
@@ -172,12 +166,6 @@ ret:
int
auth_glusterfs_v2_request_init (rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
return 0;
}
diff --git a/rpc/rpc-lib/src/auth-null.c b/rpc/rpc-lib/src/auth-null.c
index ebdcc8ff8..b030341ab 100644
--- a/rpc/rpc-lib/src/auth-null.c
+++ b/rpc/rpc-lib/src/auth-null.c
@@ -22,15 +22,6 @@
int
auth_null_request_init (rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
-
- memset (req->cred.authdata, 0, GF_MAX_AUTH_BYTES);
- req->cred.datalen = 0;
-
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
-
return 0;
}
diff --git a/rpc/rpc-lib/src/auth-unix.c b/rpc/rpc-lib/src/auth-unix.c
index fa5f0576e..27351f669 100644
--- a/rpc/rpc-lib/src/auth-unix.c
+++ b/rpc/rpc-lib/src/auth-unix.c
@@ -24,12 +24,6 @@
int
auth_unix_request_init (rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
return 0;
}
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index d64f280cf..5876a500b 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -221,6 +221,23 @@ typedef enum {
GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA,
} gf_xl_afr_op_t ;
+struct gf_gsync_detailed_status_ {
+ char node[NAME_MAX];
+ char master[NAME_MAX];
+ char brick[NAME_MAX];
+ char slave_node[NAME_MAX];
+ char worker_status[NAME_MAX];
+ char checkpoint_status[NAME_MAX];
+ char crawl_status[NAME_MAX];
+ char files_syncd[NAME_MAX];
+ char files_remaining[NAME_MAX];
+ char bytes_remaining[NAME_MAX];
+ char purges_remaining[NAME_MAX];
+ char total_files_skipped[NAME_MAX];
+};
+
+typedef struct gf_gsync_detailed_status_ gf_gsync_status_t;
+
#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
#define GLUSTER_HNDSK_VERSION 2 /* 0.0.2 */
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index ac98a5c91..1e9f307be 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -416,7 +416,7 @@ rpc_clnt_reconnect (void *trans_ptr)
conn->reconnect);
conn->reconnect = 0;
- if (conn->connected == 0) {
+ if ((conn->connected == 0) && !clnt->disabled) {
ts.tv_sec = 3;
ts.tv_nsec = 0;
@@ -834,6 +834,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
rpc_request_info_t *req_info = NULL;
rpc_transport_pollin_t *pollin = NULL;
struct timespec ts = {0, };
+ void *clnt_mydata = NULL;
conn = mydata;
if (conn == NULL) {
@@ -870,6 +871,12 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
}
case RPC_TRANSPORT_CLEANUP:
+ if (clnt->notifyfn) {
+ clnt_mydata = clnt->mydata;
+ clnt->mydata = NULL;
+ ret = clnt->notifyfn (clnt, clnt_mydata,
+ RPC_CLNT_DESTROY, NULL);
+ }
rpc_clnt_destroy (clnt);
ret = 0;
break;
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index 584963ad0..2596c3508 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -19,7 +19,8 @@
typedef enum {
RPC_CLNT_CONNECT,
RPC_CLNT_DISCONNECT,
- RPC_CLNT_MSG
+ RPC_CLNT_MSG,
+ RPC_CLNT_DESTROY
} rpc_clnt_event_t;
diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c
index e7ba114dd..7e77e038e 100644
--- a/rpc/rpc-lib/src/rpc-drc.c
+++ b/rpc/rpc-lib/src/rpc-drc.c
@@ -712,38 +712,30 @@ rpcsvc_drc_init (rpcsvc_t *svc, dict_t *options)
GF_ASSERT (svc);
GF_ASSERT (options);
- if (!svc->drc) {
- drc = GF_CALLOC (1, sizeof (rpcsvc_drc_globals_t),
- gf_common_mt_drc_globals_t);
- if (!drc)
- return -1;
-
- svc->drc = drc;
- LOCK_INIT (&drc->lock);
- } else {
- drc = svc->drc;
- }
-
- LOCK (&drc->lock);
- if (drc->type != DRC_TYPE_NONE) {
- ret = 0;
- goto out;
- }
-
/* Toggle DRC on/off, when more drc types(persistent/cluster)
are added, we shouldn't treat this as boolean */
ret = dict_get_str_boolean (options, "nfs.drc", _gf_true);
if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_INFO, "drc user options need second look");
+ gf_log (GF_RPCSVC, GF_LOG_INFO,
+ "drc user options need second look");
ret = _gf_true;
}
- if (ret == _gf_false) {
- /* drc off */
- gf_log (GF_RPCSVC, GF_LOG_INFO, "DRC is manually turned OFF");
- ret = 0;
- goto out;
- }
+ gf_log (GF_RPCSVC, GF_LOG_INFO, "DRC is turned %s", (ret?"ON":"OFF"));
+
+ /*DRC off, nothing to do */
+ if (ret == _gf_false)
+ return (0);
+
+ drc = GF_CALLOC (1, sizeof (rpcsvc_drc_globals_t),
+ gf_common_mt_drc_globals_t);
+ if (!drc)
+ return (-1);
+
+ LOCK_INIT (&drc->lock);
+ svc->drc = drc;
+
+ LOCK (&drc->lock);
/* Specify type of DRC to be used */
ret = dict_get_uint32 (options, "nfs.drc-type", &drc_type);
diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c
index 4cb86a758..384e4a75d 100644
--- a/rpc/rpc-lib/src/rpcsvc-auth.c
+++ b/rpc/rpc-lib/src/rpcsvc-auth.c
@@ -230,6 +230,8 @@ int
rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options)
{
int ret = -1;
+ uid_t anonuid = -1;
+ gid_t anongid = -1;
GF_ASSERT (svc);
GF_ASSERT (options);
@@ -240,8 +242,21 @@ rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options)
else
svc->root_squash = _gf_false;
+ ret = dict_get_uint32 (options, "anonuid", &anonuid);
+ if (!ret)
+ svc->anonuid = anonuid;
+ else
+ svc->anonuid = RPC_NOBODY_UID;
+
+ ret = dict_get_uint32 (options, "anongid", &anongid);
+ if (!ret)
+ svc->anongid = anongid;
+ else
+ svc->anongid = RPC_NOBODY_GID;
+
if (svc->root_squash)
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "root squashing enabled ");
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "root squashing enabled "
+ "(uid=%d, gid=%d)", svc->anonuid, svc->anongid);
return 0;
}
@@ -354,25 +369,36 @@ ret:
int
-rpcsvc_auth_request_init (rpcsvc_request_t *req)
+rpcsvc_auth_request_init (rpcsvc_request_t *req, struct rpc_msg *callmsg)
{
- int ret = -1;
+ int32_t ret = 0;
rpcsvc_auth_t *auth = NULL;
- if (!req)
- return -1;
+ if (!req || !callmsg) {
+ ret = -1;
+ goto err;
+ }
+
+ req->cred.flavour = rpc_call_cred_flavour (callmsg);
+ req->cred.datalen = rpc_call_cred_len (callmsg);
+ req->verf.flavour = rpc_call_verf_flavour (callmsg);
+ req->verf.datalen = rpc_call_verf_len (callmsg);
auth = rpcsvc_auth_get_handler (req);
- if (!auth)
+ if (!auth) {
+ ret = -1;
goto err;
- ret = 0;
+ }
+
gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
- if (!auth->authops->request_init)
- ret = auth->authops->request_init (req, auth->authprivate);
- req->auxgids = req->auxgidsmall; /* reset to auxgidlarge during
- unsersialize if necessary */
- req->auxgidlarge = NULL;
+ if (auth->authops->request_init)
+ ret = auth->authops->request_init (req, auth->authprivate);
+
+ /* reset to auxgidlarge during
+ unsersialize if necessary */
+ req->auxgids = req->auxgidsmall;
+ req->auxgidlarge = NULL;
err:
return ret;
}
diff --git a/rpc/rpc-lib/src/rpcsvc-common.h b/rpc/rpc-lib/src/rpcsvc-common.h
index aed55e039..3c16abeb7 100644
--- a/rpc/rpc-lib/src/rpcsvc-common.h
+++ b/rpc/rpc-lib/src/rpcsvc-common.h
@@ -55,6 +55,8 @@ typedef struct rpcsvc_state {
gf_boolean_t allow_insecure;
gf_boolean_t register_portmap;
gf_boolean_t root_squash;
+ uid_t anonuid;
+ gid_t anongid;
glusterfs_ctx_t *ctx;
/* list of connections which will listen for incoming connections */
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index 037c157f2..69db8b70b 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -367,13 +367,7 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans,
* been copied into the required sections of the req structure,
* we just need to fill in the meta-data about it now.
*/
- req->cred.flavour = rpc_call_cred_flavour (callmsg);
- req->cred.datalen = rpc_call_cred_len (callmsg);
- req->verf.flavour = rpc_call_verf_flavour (callmsg);
- req->verf.datalen = rpc_call_verf_len (callmsg);
-
- /* AUTH */
- rpcsvc_auth_request_init (req);
+ rpcsvc_auth_request_init (req, callmsg);
return req;
}
@@ -1183,7 +1177,7 @@ rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
iobref_add (iobref, replyiob);
/* cache the request in the duplicate request cache for appropriate ops */
- if (req->reply) {
+ if ((req->reply) && (rpcsvc_need_drc (req))) {
drc = req->svc->drc;
LOCK (&drc->lock);
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index cbc1f4226..30a969b11 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -282,14 +282,14 @@ struct rpcsvc_request {
int gidcount = 0; \
if (req->svc->root_squash) { \
if (req->uid == RPC_ROOT_UID) \
- req->uid = RPC_NOBODY_UID; \
+ req->uid = req->svc->anonuid; \
if (req->gid == RPC_ROOT_GID) \
- req->gid = RPC_NOBODY_GID; \
+ req->gid = req->svc->anongid; \
for (gidcount = 0; gidcount < req->auxgidcount; \
++gidcount) { \
if (!req->auxgids[gidcount]) \
req->auxgids[gidcount] = \
- RPC_NOBODY_GID; \
+ req->svc->anongid; \
} \
} \
} while (0);
@@ -553,7 +553,7 @@ struct rpcsvc_auth_list {
};
extern int
-rpcsvc_auth_request_init (rpcsvc_request_t *req);
+rpcsvc_auth_request_init (rpcsvc_request_t *req, struct rpc_msg *callmsg);
extern int
rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options);
diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
index 1647d5b6b..c6eae9739 100644
--- a/rpc/rpc-transport/socket/src/name.c
+++ b/rpc/rpc-transport/socket/src/name.c
@@ -285,7 +285,7 @@ af_unix_client_get_remote_sockaddr (rpc_transport_t *this,
goto err;
}
- if (strlen (connect_path) > UNIX_PATH_MAX) {
+ if ((strlen (connect_path) + 1) > UNIX_PATH_MAX) {
gf_log (this->name, GF_LOG_ERROR,
"connect-path value length %"GF_PRI_SIZET" > %d octets",
strlen (connect_path), UNIX_PATH_MAX);
@@ -329,7 +329,7 @@ af_unix_server_get_local_sockaddr (rpc_transport_t *this,
#define UNIX_PATH_MAX 108
#endif
- if (strlen (listen_path) > UNIX_PATH_MAX) {
+ if ((strlen (listen_path) + 1) > UNIX_PATH_MAX) {
gf_log (this->name, GF_LOG_ERROR,
"option transport.unix.listen-path has value length "
"%"GF_PRI_SIZET" > %d",
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index f9df4ac1d..c6b293be4 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -2195,7 +2195,7 @@ unlock:
rpc_transport_notify (this, event, this);
}
out:
- return 0;
+ return ret;
}
diff --git a/tests/bugs/brick-uid-reset-on-volume-restart.t b/tests/bugs/brick-uid-reset-on-volume-restart.t
new file mode 100755
index 000000000..99629733f
--- /dev/null
+++ b/tests/bugs/brick-uid-reset-on-volume-restart.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function get_uid() {
+ stat -c '%u' $1;
+}
+
+function get_gid() {
+ stat -c '%g' $1;
+}
+
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '8' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+EXPECT 0 get_uid $M0;
+EXPECT 0 get_gid $M0;
+
+TEST chown 100:101 $M0;
+
+EXPECT 100 get_uid $M0;
+EXPECT 101 get_gid $M0;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume start $V0;
+
+sleep 10;
+
+EXPECT 100 get_uid $M0;
+EXPECT 101 get_gid $M0;
+
+cleanup;
diff --git a/tests/bugs/bug-1037501.t b/tests/bugs/bug-1037501.t
new file mode 100755
index 000000000..d11c788a0
--- /dev/null
+++ b/tests/bugs/bug-1037501.t
@@ -0,0 +1,253 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function write_file()
+{
+ path="$1"; shift
+ echo "$*" > "$path"
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure io-cache and write-behind don't interfere.
+TEST $CLI volume set $V0 data-self-heal off;
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST $CLI volume set $V0 background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+TEST `echo "TEST-FILE" > $M0/File`
+TEST `mkdir $M0/Dir`
+TEST `ln $M0/File $M0/Link`
+TEST `mknod $M0/FIFO p`
+
+TEST $CLI volume add-brick $V0 replica 4 $H0:$B0/$V0-3 force
+TEST $CLI volume add-brick $V0 replica 5 $H0:$B0/$V0-4 force
+TEST $CLI volume add-brick $V0 replica 6 $H0:$B0/$V0-5 force
+
+sleep 10
+
+TEST ls $M0/
+
+
+function compare()
+{
+ var=-1;
+ if [ $1 == $2 ]; then
+ var=0;
+ else
+ var=-1;
+ fi
+
+ echo $var
+}
+
+var2="000000000000000000000000"
+
+var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1| cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+
+var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+
+
+var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-3"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-4"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-5"`
+EXPECT "0" echo $?
+var3=`echo $var1 | cut -d x -f 2`
+EXPECT_NOT $var2 echo $var3
+
+cleanup;
diff --git a/tests/bugs/bug-1040408.t b/tests/bugs/bug-1040408.t
new file mode 100644
index 000000000..2982d6a81
--- /dev/null
+++ b/tests/bugs/bug-1040408.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+#Test case: Create a distributed replicate volume, and reduce
+#replica count
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a 2X3 distributed-replicate volume
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+# Reduce to 2x2 volume by specifying bricks in reverse order
+function remove_brick_status {
+ $CLI volume remove-brick $V0 replica 2 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}3 force 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_status;
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-1043886.t b/tests/bugs/bug-1043886.t
new file mode 100755
index 000000000..fb7ecb194
--- /dev/null
+++ b/tests/bugs/bug-1043886.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0
+
+sleep 2;
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN 20 "1" is_nfs_export_available;
+
+## Mount volume as NFS export
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0;
+
+# just a random uid/gid
+uid=22162
+gid=5845
+
+mkdir $N0/other;
+chown $uid:$gid $N0/other;
+
+TEST $CLI volume set $V0 server.root-squash on;
+TEST $CLI volume set $V0 server.anonuid $uid;
+TEST $CLI volume set $V0 server.anongid $gid;
+
+sleep 2;
+
+EXPECT_WITHIN 20 "1" is_nfs_export_available;
+
+# create files and directories in the root of the glusterfs and nfs mount
+# which is owned by root and hence the right behavior is getting EACCESS
+# as the fops are executed as nfsnobody.
+touch $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+# Here files and directories should be getting created as other directory is owned
+# by tmp_user as server.anonuid and server.anongid have the value of tmp_user uid and gid
+TEST touch $M0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/file)" = "$uid:$gid" ];
+TEST mkdir $M0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/dir)" = "$uid:$gid" ];
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-1046308.t b/tests/bugs/bug-1046308.t
new file mode 100644
index 000000000..cfec3a35d
--- /dev/null
+++ b/tests/bugs/bug-1046308.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+volname="StartMigrationDuringRebalanceTest"
+TEST glusterd
+TEST pidof glusterd;
+
+TEST $CLI volume info;
+TEST $CLI volume create $volname $H0:$B0/${volname}{1,2};
+TEST $CLI volume start $volname;
+TEST $CLI volume rebalance $volname start;
+
+cleanup;
+
+
+
diff --git a/tests/bugs/bug-1046624.t b/tests/bugs/bug-1046624.t
new file mode 100755
index 000000000..bd46b5eaf
--- /dev/null
+++ b/tests/bugs/bug-1046624.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST $CLI volume set $V0 background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+TEST `echo "TEST-FILE" > $M0/File`
+TEST `mkdir $M0/Dir`
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST `ln -s $M0/File $M0/Link1`
+TEST `ln -s $M0/Dir $M0/Link2`
+
+TEST $CLI volume start $V0 force
+
+TEST `find $M0/ | xargs stat 2>/dev/null 1>/dev/null`
+
+sleep 60
+
+TEST stat $B0/${V0}-0/Link1
+TEST stat $B0/${V0}-0/Link2
+
+cleanup;
diff --git a/tests/bugs/bug-1047955.t b/tests/bugs/bug-1047955.t
new file mode 100644
index 000000000..e15f3ceef
--- /dev/null
+++ b/tests/bugs/bug-1047955.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+# Create a 2x2 dist-rep volume; peer probe a new node.
+# Performing remove-brick from this new node must succeed
+# without crashing it's glusterd
+
+TEST launch_cluster 2;
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/${V0}{1,2,3,4}
+TEST $CLI_1 volume start $V0;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN 20 1 check_peers;
+TEST $CLI_2 volume remove-brick $V0 $H1:$B1/${V0}{3,4} start;
+TEST $CLI_2 volume info
+cleanup;
diff --git a/tests/bugs/bug-1051896.c b/tests/bugs/bug-1051896.c
new file mode 100644
index 000000000..0ffd81986
--- /dev/null
+++ b/tests/bugs/bug-1051896.c
@@ -0,0 +1,94 @@
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <utime.h>
+#include <sys/acl.h>
+
+int do_setfacl(const char *path, const char *options, const char *textacl)
+{
+ int r;
+ int type;
+ acl_t acl;
+ int dob;
+ int dok;
+ int dom;
+ struct stat st;
+ char textmode[30];
+
+ r = 0;
+ dob = strchr(options,'b') != (char*)NULL;
+ dok = strchr(options,'k') != (char*)NULL;
+ dom = strchr(options,'m') != (char*)NULL;
+ if ((dom && !textacl)
+ || (!dom && (textacl || (!dok && !dob) ||
+ strchr(options,'d')))) {
+ errno = EBADRQC; /* "bad request" */
+ r = -1;
+ } else {
+ if (dob || dok) {
+ r = acl_delete_def_file(path);
+ }
+ if (dob && !r) {
+ if (!stat(path,&st)) {
+ sprintf(textmode,
+ "u::%c%c%c,g::%c%c%c,o::%c%c%c",
+ (st.st_mode & 0400 ? 'r' : '-'),
+ (st.st_mode & 0200 ? 'w' : '-'),
+ (st.st_mode & 0100 ? 'x' : '-'),
+ (st.st_mode & 0040 ? 'r' : '-'),
+ (st.st_mode & 0020 ? 'w' : '-'),
+ (st.st_mode & 0010 ? 'x' : '-'),
+ (st.st_mode & 004 ? 'r' : '-'),
+ (st.st_mode & 002 ? 'w' : '-'),
+ (st.st_mode & 001 ? 'x' : '-'));
+ acl = acl_from_text(textmode);
+ if (acl) {
+ r = acl_set_file(path,
+ ACL_TYPE_ACCESS,acl);
+ acl_free(acl);
+ } else
+ r = -1;
+ } else
+ r = -1;
+ }
+ if (!r && dom) {
+ if (strchr(options,'d'))
+ type = ACL_TYPE_DEFAULT;
+ else
+ type = ACL_TYPE_ACCESS;
+ acl = acl_from_text(textacl);
+ if (acl) {
+ r = acl_set_file(path,type,acl);
+ acl_free(acl);
+ } else
+ r = -1;
+ }
+ }
+ if (r)
+ r = -errno;
+ return (r);
+}
+
+
+int main(int argc, char *argv[]){
+ int rc = 0;
+ if (argc != 4) {
+ fprintf(stderr,
+ "usage: ./setfacl_test <path> <options> <textacl>\n");
+ return 0;
+ }
+ if ((rc = do_setfacl(argv[1], argv[2], argv[3])) != 0){
+ fprintf(stderr, "do_setfacl failed: %s\n", strerror(errno));
+ return rc;
+ }
+ return 0;
+}
diff --git a/tests/bugs/bug-1051896.t b/tests/bugs/bug-1051896.t
new file mode 100644
index 000000000..75859cbef
--- /dev/null
+++ b/tests/bugs/bug-1051896.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --acl -s $H0 --volfile-id $V0 $M0;
+
+TEST touch $M0/file1;
+
+gcc -lacl $(dirname $0)/bug-1051896.c -o $(dirname $0)/bug-1051896
+TEST ! $(dirname $0)/bug-1051896 $M0/file1 m 'u::r,u::w,g::r--,o::r--'
+rm -f $(dirname $0)/bug-1051896
+
+cleanup
diff --git a/tests/bugs/bug-765564.t b/tests/bugs/bug-765564.t
index 0b8b8cd4f..6e4087f80 100644
--- a/tests/bugs/bug-765564.t
+++ b/tests/bugs/bug-765564.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
+cleanup;
+
TEST glusterd
TEST pidof glusterd
diff --git a/tests/bugs/bug-921072.t b/tests/bugs/bug-921072.t
index e101d5b46..37f8fde52 100755
--- a/tests/bugs/bug-921072.t
+++ b/tests/bugs/bug-921072.t
@@ -89,12 +89,13 @@ TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1
EXPECT_WITHIN 20 1 is_nfs_export_available
TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST mkdir -p $N0/subdir
TEST umount $N0
# case 10: allow a non-localhost ip
TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1
EXPECT_WITHIN 20 1 is_nfs_export_available
-#40
+#41
TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
# case 11: reject only localhost ip
@@ -104,6 +105,7 @@ TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1
EXPECT_WITHIN 20 1 is_nfs_export_available
TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0/subdir $N0
# case 12: reject only non-localhost ip
TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1
@@ -112,7 +114,10 @@ EXPECT_WITHIN 20 1 is_nfs_export_available
TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
TEST umount $N0
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0/subdir $N0
+TEST umount $N0
+
TEST $CLI volume stop --mode=script $V0
-#49
+#52
TEST $CLI volume delete --mode=script $V0
cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index af01f2ef2..224d30546 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1833,12 +1833,18 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
afr_lookup_set_self_heal_params (local, this);
if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
if (afr_is_transaction_running (local) &&
- (!local->allow_sh_for_running_transaction))
+ /*Forcefully call afr_launch_self_heal (which will go on to
+ fail) for SB files.This prevents stale data being served
+ due to race in afr_is_transaction_running() when
+ multiple clients access the same SB file*/
+ !local->cont.lookup.possible_spb &&
+ (!local->attempt_self_heal))
goto out;
reason = "lookup detected pending operations";
afr_launch_self_heal (frame, this, local->cont.lookup.inode,
- _gf_true, local->cont.lookup.buf.ia_type,
+ !local->foreground_self_heal,
+ local->cont.lookup.buf.ia_type,
reason, afr_post_gfid_sh_success,
afr_self_heal_lookup_unwind);
*sh_launched = _gf_true;
@@ -2420,16 +2426,15 @@ int
afr_lookup (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *xattr_req)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- void *gfid_req = NULL;
- int ret = -1;
- int i = 0;
- int call_count = 0;
- uint64_t ctx = 0;
- int32_t op_errno = 0;
- int allow_sh = 0;
- priv = this->private;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ void *gfid_req = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_count = 0;
+ uint64_t ctx = 0;
+ int32_t op_errno = 0;
+ priv = this->private;
AFR_LOCAL_ALLOC_OR_GOTO (local, out);
@@ -2500,10 +2505,13 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
/* By default assume ENOTCONN. On success it will be set to 0. */
local->op_errno = ENOTCONN;
- ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
- &allow_sh);
- dict_del (xattr_req, "allow-sh-for-running-transaction");
- local->allow_sh_for_running_transaction = allow_sh;
+ ret = dict_get_int32 (xattr_req, "attempt-self-heal",
+ &local->attempt_self_heal);
+ dict_del (xattr_req, "attempt-self-heal");
+
+ ret = dict_get_int32 (xattr_req, "foreground-self-heal",
+ &local->foreground_self_heal);
+ dict_del (xattr_req, "foreground-self-heal");
ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
&gfid_req);
@@ -3977,6 +3985,10 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_TRANSLATOR_OP:
input = data;
output = data2;
+ if (!had_heard_from_all) {
+ ret = -1;
+ goto out;
+ }
ret = afr_xl_op (this, input, output);
goto out;
break;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 8a2853319..0cfebcb9d 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1348,7 +1348,7 @@ afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
int ret = 0;
if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
- ret = gf_get_min_stime (THIS, data, key, value);
+ ret = gf_get_max_stime (THIS, data, key, value);
return ret;
}
@@ -1410,7 +1410,8 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
goto out;
}
- if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) {
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY) ||
+ !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) {
if (is_fgetxattr) {
*cbk = afr_fgetxattr_pathinfo_cbk;
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 53491a1d7..00f1a9cb9 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1002,6 +1002,7 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
int child_index = 0;
+ int call_count = -1;
priv = this->private;
impunge_local = impunge_frame->local;
@@ -1012,16 +1013,26 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
gf_log (this->name, GF_LOG_INFO,
"%s: failed to perform xattrop on %s (%s)",
impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
+ priv->children[child_index]->name, strerror (op_errno));
+
+ LOCK (&impunge_frame->lock);
+ {
+ impunge_local->op_ret = -1;
+ impunge_local->op_errno = op_errno;
+ }
+ UNLOCK (&impunge_frame->lock);
}
- afr_sh_entry_impunge_setattr (impunge_frame, this);
- return 0;
-out:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
+ call_count = afr_frame_return (impunge_frame);
+
+ if (call_count == 0) {
+ if (impunge_local->op_ret == 0) {
+ afr_sh_entry_impunge_setattr (impunge_frame, this);
+ } else {
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, impunge_local->op_errno);
+ }
+ }
return 0;
}
@@ -1035,11 +1046,15 @@ afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
int32_t op_errno = 0;
+ int32_t call_count = 0;
+ int32_t i = 0;
+
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
active_src = impunge_sh->active_source;
+ impunge_local->op_ret = 0;
afr_prepare_new_entry_pending_matrix (impunge_local->pending,
afr_is_errno_unset,
@@ -1055,11 +1070,32 @@ afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src,
LOCAL_LAST);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((impunge_sh->child_errno[i] == EEXIST) &&
+ (impunge_local->child_up[i] == 1))
+
+ call_count++;
+ }
+
+ impunge_local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+
+ if ((impunge_sh->child_errno[i] == EEXIST)
+ && (impunge_local->child_up[i] == 1)) {
+
+
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_xattrop_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &impunge_local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ if (!--call_count)
+ break;
+ }
+ }
if (xattr)
dict_unref (xattr);
@@ -1494,7 +1530,7 @@ afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cooki
child_index = (long) cookie;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
+ if ((op_ret == -1) && (!afr_inode_missing(op_errno))) {
gf_log (this->name, GF_LOG_INFO,
"readlink of %s on %s failed (%s)",
impunge_local->loc.path,
@@ -1505,7 +1541,7 @@ afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cooki
/* symlink doesn't exist on the sink */
- if ((op_ret == -1) && (op_errno == ENOENT)) {
+ if ((op_ret == -1) && (afr_inode_missing(op_errno))) {
afr_sh_entry_impunge_symlink (impunge_frame, this,
child_index, impunge_sh->linkname);
return 0;
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 8dbb9c69e..5f85c3047 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -20,7 +20,8 @@
#include "event-history.h"
typedef enum {
- STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1,
+ STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL = 2
} afr_crawl_flags_t;
typedef enum {
@@ -641,7 +642,7 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent
goto out;
}
- ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1);
+ ret = dict_set_int32 (xattr_req, "attempt-self-heal", 1);
gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
@@ -668,11 +669,26 @@ afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
return 0;
}
+int
+_get_heal_op_flags (shd_crawl_op op, afr_crawl_type_t crawl)
+{
+ int crawl_flags = 0;
+
+ if (HEAL == op) {
+ crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+
+ if (crawl == INDEX)
+ crawl_flags |= STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL;
+ }
+
+ return crawl_flags;
+}
+
void
_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
{
afr_start_crawl (this, child, crawl, _self_heal_entry,
- NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ NULL, _gf_true, _get_heal_op_flags (HEAL, crawl),
afr_crawl_done);
}
@@ -691,6 +707,7 @@ _crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
gf_log (this->name, GF_LOG_DEBUG, "%s", msg);
goto out;
}
+
if (!priv->child_up[child]) {
gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , "
"subvol went down", priv->children[child]->name);
@@ -707,6 +724,17 @@ _crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
goto out;
}
}
+
+ if (crawl_flags & STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL) {
+ if (shd->pending[child] == FULL) {
+ gf_log (this->name, GF_LOG_INFO, "Stopping index "
+ "self-heal as Full self-heal is pending on %s",
+ priv->children[child]->name);
+ msg = "Full crawl is pending";
+ goto out;
+ }
+ }
+
proceed = _gf_true;
out:
if (reason)
@@ -730,8 +758,7 @@ _do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
int crawl_flags = 0;
priv = this->private;
- if (op == HEAL)
- crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+ crawl_flags = _get_heal_op_flags (op, crawl);
if (output) {
ret = dict_get_int32 (output, this->name, &xl_id);
@@ -1684,7 +1711,10 @@ afr_dir_exclusive_crawl (void *data)
if (!crawl) {
gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
- "for %s", priv->children[child]->name);
+ "for %s while attempting %s heal on %s",
+ priv->children[child]->name,
+ get_crawl_type_in_string (crawl_data->crawl),
+ priv->children[child]->name);
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 34417a158..b43fde47d 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -584,8 +584,8 @@ struct volume_options options[] = {
{ .key = {"choose-local" },
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true",
- .description = "Choose a local subvolume(i.e. Brick) to read from if "
- "read-subvolume is not explicitly set.",
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
},
{ .key = {"favorite-child"},
.type = GF_OPTION_TYPE_XLATOR,
@@ -695,7 +695,7 @@ struct volume_options options[] = {
.description = "Lock phase of a transaction has two sub-phases. "
"First is an attempt to acquire locks in parallel by "
"broadcasting non-blocking lock requests. If lock "
- "aquistion fails on any server, then the held locks "
+ "acquisition fails on any server, then the held locks "
"are unlocked and revert to a blocking locked mode "
"sequentially on one server after another. If this "
"option is enabled the initial broadcasting lock "
@@ -711,16 +711,15 @@ struct volume_options options[] = {
"arrives before the unlock phase of the \"optimized\" "
"transaction, that in turn \"takes over\" the lock as "
"well. The actual unlock now happens at the end of "
- "the last \"optimzed\" transaction."
+ "the last \"optimized\" transaction."
},
{ .key = {"self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
.description = "This option applies to only self-heal-daemon. "
- "Index directory crawl and automatic healing of files"
- " will not be performed if this option is turned"
- " off."
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."
},
{ .key = {"iam-self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index dcf0f8d0c..9196a1f27 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -33,6 +33,8 @@
#define AFR_LOCKEE_COUNT_MAX 3
#define AFR_DOM_COUNT_MAX 3
+#define afr_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE)
+
struct _pump_private;
typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
@@ -509,7 +511,8 @@ typedef struct _afr_local {
*/
gf_boolean_t append_write;
- int allow_sh_for_running_transaction;
+ int attempt_self_heal;
+ int foreground_self_heal;
/* This struct contains the arguments for the "continuation"
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 898f41f0e..f59bc9667 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -2015,13 +2015,7 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->xattr) {
local->xattr = dict_copy_with_ref (xattr, NULL);
} else {
- /* first aggregate everything into xattr and then copy into
- * local->xattr. This is required as we want to have
- * 'local->xattr' as the proper final dictionary passed above
- * distribute xlator.
- */
- dht_aggregate_xattr (xattr, local->xattr);
- local->xattr = dict_copy (xattr, local->xattr);
+ dht_aggregate_xattr (local->xattr, xattr);
}
out:
if (is_last_call (this_call_cnt)) {
@@ -2167,7 +2161,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
* (until inode_link() happens)
*/
if (key && DHT_IS_DIR(layout) &&
- ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)
+ (XATTR_IS_PATHINFO (key)
|| (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
(void) strncpy (local->xsel, key, 256);
cnt = local->call_cnt = layout->cnt;
@@ -2182,7 +2176,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
/* node-uuid or pathinfo for files */
if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
- || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) {
+ || XATTR_IS_PATHINFO (key))) {
cached_subvol = local->cached_subvol;
(void) strncpy (local->xsel, key, 256);
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 12a551505..e8a9a7196 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -531,7 +531,7 @@ dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&local->loc, local->rebalance.flags, NULL);
return 0;
}
- if ((op_ret == -1) && (op_errno == ENOENT)) {
+ if ((op_ret == -1) && dht_inode_missing(op_errno)) {
/* File would be migrated to other node */
local->op_errno = op_errno;
local->rebalance.target_op_fn = dht_access2;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 3e471edca..9446dbe03 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1105,6 +1105,10 @@ gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
* have been fixed
*/
+#ifdef GF_LINUX_HOST_OS
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+#endif
int
gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *migrate_data)
@@ -1371,6 +1375,9 @@ out:
return ret;
}
+#ifdef GF_LINUX_HOST_OS
+#pragma GCC pop_options
+#endif
int
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 674297c71..324b30626 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -690,7 +690,8 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_INT,
.min = 1,
.validate = GF_OPT_VALIDATE_MIN,
- .description = "Specifies the directory layout spread."
+ .description = "Specifies the directory layout spread. Takes number "
+ "of subvolumes as default value."
},
{ .key = {"decommissioned-bricks"},
.type = GF_OPTION_TYPE_ANY,
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index 8ac14cb3a..c98126225 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -5548,9 +5548,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
- if (name &&
- ((strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0))) {
+ if (name && (XATTR_IS_PATHINFO (name))) {
if (IA_ISREG (loc->inode->ia_type)) {
ret = inode_ctx_get (loc->inode, this,
(uint64_t *) &local->fctx);
@@ -5637,8 +5635,7 @@ stripe_is_special_xattr (const char *name)
if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
strlen (GF_XATTR_LOCKINFO_KEY))
- || !strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)))
+ || XATTR_IS_PATHINFO (name))
is_spl = _gf_true;
out:
return is_spl;
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
index db2e6d83c..becff3e47 100644
--- a/xlators/encryption/crypt/src/crypt.c
+++ b/xlators/encryption/crypt/src/crypt.c
@@ -1421,8 +1421,11 @@ static int32_t prune_write(call_frame_t *frame,
gf_crypt_mt_data);
if (local->vec.iov_base == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to calloc head block for prune");
local->op_ret = -1;
local->op_errno = ENOMEM;
+ goto put_one_call;
}
for (i = 0; i < count; i++) {
to_copy = vec[i].iov_len;
@@ -3274,15 +3277,15 @@ static int32_t linkop_grab_local(call_frame_t *frame,
if (newloc) {
local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc);
if (!local->newloc) {
- GF_FREE(local->loc);
loc_wipe(local->loc);
+ GF_FREE(local->loc);
goto error;
}
memset(local->newloc, 0, sizeof(*local->newloc));
ret = loc_copy(local->newloc, newloc);
if (ret) {
- GF_FREE(local->loc);
loc_wipe(local->loc);
+ GF_FREE(local->loc);
GF_FREE(local->newloc);
goto error;
}
@@ -3294,19 +3297,21 @@ static int32_t linkop_grab_local(call_frame_t *frame,
goto error;
}
return 0;
- error:
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
- local->fd = 0;
- local->loc = NULL;
- local->newloc = NULL;
- local->op_ret = -1;
- local->op_errno = ret;
+error:
+ if (local) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ if (local->fd)
+ fd_unref(local->fd);
+ local->fd = 0;
+ local->loc = NULL;
+ local->newloc = NULL;
+ local->op_ret = -1;
+ local->op_errno = ret;
+ }
- return ret;
+ return ret;
}
/*
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index 2923e9c91..db592719b 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -16,6 +16,7 @@
#include "options.h"
#include "glusterfs3-xdr.h"
#include "syncop.h"
+#include "syscall.h"
#define XATTROP_SUBDIR "xattrop"
#define BASE_INDICES_HOLDER_SUBDIR "base_indices_holder"
@@ -264,7 +265,7 @@ check_delete_stale_index_file (xlator_t *this, char *filename)
ret = stat (filepath_under_base_indices_holder, &base_index_st);
if (ret) {
gf_log (THIS->name, GF_LOG_ERROR, "Base index is not created"
- "under index/base_indices_holder");
+ " under index/base_indices_holder");
return;
}
@@ -407,7 +408,8 @@ sync_base_indices (void *index_priv)
snprintf (base_index_path, PATH_MAX, "%s/%s",
base_indices_holder, entry->d_name);
- ret = link (xattrop_index_path, base_index_path);
+ ret = sys_link (xattrop_index_path, base_index_path);
+
if (ret && errno != EEXIST)
goto out;
@@ -543,7 +545,8 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir)
index_get_index (priv, index);
make_index_path (priv->index_basepath, subdir,
index, index_path, sizeof (index_path));
- ret = link (index_path, gfid_path);
+
+ ret = sys_link (index_path, gfid_path);
if (!ret || (errno == EEXIST)) {
ret = 0;
index_created = 1;
@@ -576,7 +579,7 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir)
if (fd >= 0)
close (fd);
- ret = link (index_path, gfid_path);
+ ret = sys_link (index_path, gfid_path);
if (ret && (errno != EEXIST)) {
gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
"add to index (%s)", uuid_utoa (gfid),
@@ -590,7 +593,7 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir)
make_index_path (priv->index_basepath,
GF_BASE_INDICES_HOLDER_GFID,
index, base_path, sizeof (base_path));
- ret = link (index_path, base_path);
+ ret = sys_link (index_path, base_path);
if (ret)
goto out;
}
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 124b9ad0f..75593b898 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -338,9 +338,8 @@ blkd:
elock->basename, ENTRYLK_LOCK, elock->type,
-1, EAGAIN);
STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL);
- GF_FREE ((char *) elock->basename);
- GF_FREE (elock->connection_id);
- GF_FREE (elock);
+
+ __pl_entrylk_unref (elock);
}
if (!(args->kind & CLRLK_GRANTED)) {
@@ -363,13 +362,13 @@ granted:
gcount++;
list_del_init (&elock->domain_list);
list_add_tail (&elock->domain_list, &removed);
+
+ __pl_entrylk_unref (elock);
}
}
pthread_mutex_unlock (&pl_inode->mutex);
- list_for_each_entry_safe (elock, tmp, &removed, domain_list) {
- grant_blocked_entry_locks (this, pl_inode, elock, dom);
- }
+ grant_blocked_entry_locks (this, pl_inode, dom);
ret = 0;
out:
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index b3309580d..f6c71c1cf 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1099,123 +1099,3 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
return conf;
}
-
-struct _lock_table *
-pl_lock_table_new (void)
-{
- struct _lock_table *new = NULL;
-
- new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table);
- if (new == NULL) {
- goto out;
- }
- INIT_LIST_HEAD (&new->entrylk_lockers);
- INIT_LIST_HEAD (&new->inodelk_lockers);
- LOCK_INIT (&new->lock);
-out:
- return new;
-}
-
-
-int
-pl_add_locker (struct _lock_table *table, const char *volume,
- loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner,
- glusterfs_fop_t type)
-{
- int32_t ret = -1;
- struct _locker *new = NULL;
-
- GF_VALIDATE_OR_GOTO ("lock-table", table, out);
- GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
-
- new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker);
- if (new == NULL) {
- goto out;
- }
- INIT_LIST_HEAD (&new->lockers);
-
- new->volume = gf_strdup (volume);
-
- if (fd == NULL) {
- loc_copy (&new->loc, loc);
- } else {
- new->fd = fd_ref (fd);
- }
-
- new->pid = pid;
- new->owner = *owner;
-
- LOCK (&table->lock);
- {
- if (type == GF_FOP_ENTRYLK)
- list_add_tail (&new->lockers, &table->entrylk_lockers);
- else
- list_add_tail (&new->lockers, &table->inodelk_lockers);
- }
- UNLOCK (&table->lock);
-out:
- return ret;
-}
-
-int
-pl_del_locker (struct _lock_table *table, const char *volume,
- loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type)
-{
- struct _locker *locker = NULL;
- struct _locker *tmp = NULL;
- int32_t ret = -1;
- struct list_head *head = NULL;
- struct list_head del;
-
- GF_VALIDATE_OR_GOTO ("lock-table", table, out);
- GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
-
- INIT_LIST_HEAD (&del);
-
- LOCK (&table->lock);
- {
- if (type == GF_FOP_ENTRYLK) {
- head = &table->entrylk_lockers;
- } else {
- head = &table->inodelk_lockers;
- }
-
- list_for_each_entry_safe (locker, tmp, head, lockers) {
- if (!is_same_lkowner (&locker->owner, owner) ||
- strcmp (locker->volume, volume))
- continue;
-
- /*
- * It is possible for inodelk lock to come on anon-fd
- * and inodelk unlock to come on normal fd in case of
- * client re-opens. So don't check for fds to be equal.
- */
- if (locker->fd && fd)
- list_move_tail (&locker->lockers, &del);
- else if (locker->loc.inode && loc &&
- (locker->loc.inode == loc->inode))
- list_move_tail (&locker->lockers, &del);
- }
- }
- UNLOCK (&table->lock);
-
- tmp = NULL;
- locker = NULL;
-
- list_for_each_entry_safe (locker, tmp, &del, lockers) {
- list_del_init (&locker->lockers);
- if (locker->fd)
- fd_unref (locker->fd);
- else
- loc_wipe (&locker->loc);
-
- GF_FREE (locker->volume);
- GF_FREE (locker);
- }
-
- ret = 0;
-out:
- return ret;
-
-}
-
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index db19ec978..5ec630ee8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -32,20 +32,6 @@
#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
-struct _locker {
- struct list_head lockers;
- char *volume;
- loc_t loc;
- fd_t *fd;
- gf_lkowner_t owner;
- pid_t pid;
-};
-
-struct _lock_table {
- struct list_head inodelk_lockers;
- struct list_head entrylk_lockers;
- gf_lock_t lock;
-};
posix_lock_t *
new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
@@ -92,7 +78,7 @@ __pl_inodelk_unref (pl_inode_lock_t *lock);
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom);
+ pl_dom_list_t *dom);
void pl_update_refkeeper (xlator_t *this, inode_t *inode);
@@ -166,22 +152,7 @@ pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
uint32_t
check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
-int32_t
-pl_add_locker (struct _lock_table *table, const char *volume,
- loc_t *loc,
- fd_t *fd,
- pid_t pid,
- gf_lkowner_t *owner,
- glusterfs_fop_t type);
-
-int32_t
-pl_del_locker (struct _lock_table *table, const char *volume,
- loc_t *loc,
- fd_t *fd,
- gf_lkowner_t *owner,
- glusterfs_fop_t type);
-
-struct _lock_table *
-pl_lock_table_new (void);
+void __pl_inodelk_unref (pl_inode_lock_t *lock);
+void __pl_entrylk_unref (pl_entry_lock_t *lock);
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 0785dc547..208bc140e 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -23,11 +23,29 @@
#include "locks.h"
#include "common.h"
+
+void
+__pl_entrylk_unref (pl_entry_lock_t *lock)
+{
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+}
+
+
+static void
+__pl_entrylk_ref (pl_entry_lock_t *lock)
+{
+ lock->ref++;
+}
+
+
static pl_entry_lock_t *
new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- client_t *client, pid_t client_pid, gf_lkowner_t *owner,
- const char *volume)
-
+ const char *domain, call_frame_t *frame, char *conn_id)
{
pl_entry_lock_t *newlock = NULL;
@@ -39,14 +57,21 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
newlock->basename = basename ? gf_strdup (basename) : NULL;
newlock->type = type;
- newlock->trans = client;
- newlock->volume = volume;
- newlock->client_pid = client_pid;
- newlock->owner = *owner;
+ newlock->client = frame->root->client;
+ newlock->client_pid = frame->root->pid;
+ newlock->volume = domain;
+ newlock->owner = frame->root->lk_owner;
+ newlock->frame = frame;
+
+ if (conn_id) {
+ newlock->connection_id = gf_strdup (conn_id);
+ }
INIT_LIST_HEAD (&newlock->domain_list);
INIT_LIST_HEAD (&newlock->blocked_locks);
+ INIT_LIST_HEAD (&newlock->client_list);
+ __pl_entrylk_ref (newlock);
out:
return newlock;
}
@@ -77,42 +102,42 @@ __same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
{
return (is_same_lkowner (&l1->owner, &l2->owner) &&
- (l1->trans == l2->trans));
+ (l1->client == l2->client));
}
/**
- * lock_grantable - is this lock grantable?
+ * entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
* @basename: name we're trying to lock
* @type: type of lock
*/
static pl_entry_lock_t *
-__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
if (list_empty (&dom->entrylk_list))
return NULL;
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (names_conflict (lock->basename, basename))
- return lock;
+ list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
+ if (names_conflict (tmp->basename, lock->basename))
+ return tmp;
}
return NULL;
}
static pl_entry_lock_t *
-__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
if (list_empty (&dom->blocked_entrylks))
return NULL;
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (names_conflict (lock->basename, basename))
+ list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) {
+ if (names_conflict (tmp->basename, lock->basename))
return lock;
}
@@ -293,7 +318,7 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
}
/**
- * __lock_name - lock a name in a directory
+ * __lock_entrylk - lock a name in a directory
* @inode: inode for the directory in which to lock
* @basename: name of the entry to lock
* if null, lock the entire directory
@@ -304,89 +329,49 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
*/
int
-__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
- call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this,
- int nonblock, char *conn_id)
+__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
+ int nonblock, pl_dom_list_t *dom)
{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *conf = NULL;
- int ret = -EINVAL;
-
- lock = new_entrylk_lock (pinode, basename, type,
- frame->root->client, frame->root->pid,
- &frame->root->lk_owner, dom->domain);
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- lock->frame = frame;
- lock->this = this;
- lock->trans = frame->root->client;
+ pl_entry_lock_t *conf = NULL;
+ int ret = -EAGAIN;
- if (conn_id) {
- lock->connection_id = gf_strdup (conn_id);
- }
-
- conf = __lock_grantable (dom, basename, type);
+ conf = __entrylk_grantable (dom, lock);
if (conf) {
ret = -EAGAIN;
- if (nonblock){
- GF_FREE (lock->connection_id);
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
+ if (nonblock)
goto out;
- }
-
gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
gf_log (this->name, GF_LOG_TRACE,
"Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
+ pinode, lock->basename);
goto out;
}
- if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) {
+ if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
ret = -EAGAIN;
- if (nonblock) {
- GF_FREE (lock->connection_id);
- GF_FREE ((char *) lock->basename);
- GF_FREE (lock);
+ if (nonblock)
goto out;
- }
- lock->frame = frame;
- lock->this = this;
-
gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"Lock is grantable, but blocking to prevent starvation");
gf_log (this->name, GF_LOG_TRACE,
"Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
+ pinode, lock->basename);
- ret = -EAGAIN;
goto out;
}
- switch (type) {
- case ENTRYLK_WRLCK:
- gettimeofday (&lock->granted_time, NULL);
- list_add_tail (&lock->domain_list, &dom->entrylk_list);
- break;
-
- default:
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Invalid type for entrylk specified: %d", type);
- ret = -EINVAL;
- goto out;
- }
+ __pl_entrylk_ref (lock);
+ gettimeofday (&lock->granted_time, NULL);
+ list_add (&lock->domain_list, &dom->entrylk_list);
+ lock->frame = NULL;
ret = 0;
out:
@@ -394,37 +379,36 @@ out:
}
/**
- * __unlock_name - unlock a name in a directory
+ * __unlock_entrylk - unlock a name in a directory
* @inode: inode for the directory to unlock in
* @basename: name of the entry to unlock
* if null, unlock the entire directory
*/
pl_entry_lock_t *
-__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__unlock_entrylk (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
pl_entry_lock_t *ret_lock = NULL;
- lock = __find_most_matching_lock (dom, basename);
+ tmp = __find_most_matching_lock (dom, lock->basename);
- if (!lock) {
- gf_log ("locks", GF_LOG_DEBUG,
+ if (!tmp) {
+ gf_log ("locks", GF_LOG_ERROR,
"unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found",
- basename);
+ lock->basename);
goto out;
}
- if (names_equal (lock->basename, basename)
- && lock->type == type) {
+ if (names_equal (tmp->basename, lock->basename)
+ && tmp->type == lock->type) {
+
+ list_del_init (&tmp->domain_list);
+ ret_lock = tmp;
- if (type == ENTRYLK_WRLCK) {
- list_del_init (&lock->domain_list);
- ret_lock = lock;
- }
} else {
- gf_log ("locks", GF_LOG_DEBUG,
- "Unlock for a non-existing lock!");
+ gf_log ("locks", GF_LOG_ERROR,
+ "Unlock on %s for a non-existing lock!", lock->basename);
goto out;
}
@@ -446,7 +430,7 @@ check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
pthread_mutex_lock (&pinode->mutex);
{
list_for_each_entry (dom, &pinode->dom_list, inode_list) {
- conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK);
+ conf = __find_most_matching_lock (dom, basename);
if (conf && conf->basename) {
entrylk = 1;
break;
@@ -472,28 +456,14 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
INIT_LIST_HEAD (&blocked_list);
list_splice_init (&dom->blocked_entrylks, &blocked_list);
- list_for_each_entry_safe (bl, tmp, &blocked_list,
- blocked_locks) {
+ list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
list_del_init (&bl->blocked_locks);
-
- gf_log ("locks", GF_LOG_TRACE,
- "Trying to unblock: {pinode=%p, basename=%s}",
- pl_inode, bl->basename);
-
- bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
- bl->frame, dom, bl->this, 0,
- bl->connection_id);
+ bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "should never happen");
- GF_FREE (bl->connection_id);
- GF_FREE ((char *)bl->basename);
- GF_FREE (bl);
}
}
return;
@@ -502,7 +472,7 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grants locks if possible which are blocked on a lock */
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom)
+ pl_dom_list_t *dom)
{
struct list_head granted_list;
pl_entry_lock_t *tmp = NULL;
@@ -518,105 +488,26 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_unlock (&pl_inode->mutex);
list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
entrylk_trace_out (this, lock->frame, NULL, NULL, NULL,
lock->basename, ENTRYLK_LOCK, lock->type,
0, 0);
STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+ lock->frame = NULL;
+ }
- GF_FREE (lock->connection_id);
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
- }
-
- GF_FREE ((char *)unlocked->basename);
- GF_FREE (unlocked->connection_id);
- GF_FREE (unlocked);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_entrylk_unref (lock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
return;
}
-/**
- * release_entry_locks_for_client: release all entry locks from this
- * client for this loc_t
- */
-
-static int
-release_entry_locks_for_client (xlator_t *this, pl_inode_t *pinode,
- pl_dom_list_t *dom, client_t *client)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *tmp = NULL;
- struct list_head granted;
- struct list_head released;
-
- INIT_LIST_HEAD (&granted);
- INIT_LIST_HEAD (&released);
-
- pthread_mutex_lock (&pinode->mutex);
- {
- list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks,
- blocked_locks) {
- if (lock->trans != client)
- continue;
-
- list_del_init (&lock->blocked_locks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{client=%p}", client);
-
- list_add (&lock->blocked_locks, &released);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &dom->entrylk_list,
- domain_list) {
- if (lock->trans != client)
- continue;
-
- list_del_init (&lock->domain_list);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{client=%p}", client);
-
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock->connection_id);
- GF_FREE (lock);
- }
-
- __grant_blocked_entry_locks (this, pinode, dom, &granted);
-
- }
-
- pthread_mutex_unlock (&pinode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &released, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL);
-
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock->connection_id);
- GF_FREE (lock);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
-
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock->connection_id);
- GF_FREE (lock);
- }
-
- return 0;
-}
/* Common entrylk code called by pl_entrylk and pl_fentrylk */
int
@@ -632,10 +523,12 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
char unwind = 1;
GF_UNUSED int dict_ret = -1;
pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *reqlock = NULL;
pl_entry_lock_t *unlocked = NULL;
pl_dom_list_t *dom = NULL;
char *conn_id = NULL;
pl_ctx_t *ctx = NULL;
+ int nonblock = 0;
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -646,6 +539,15 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
goto out;
}
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+ }
+
dom = get_domain (pinode, volume);
if (!dom){
op_errno = ENOMEM;
@@ -654,72 +556,64 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
- if (frame->root->lk_owner.len == 0) {
- /*
- this is a special case that means release
- all locks from this client
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for client %p", frame->root->client);
-
- release_entry_locks_for_client (this, pinode, dom,
- frame->root->client);
- op_ret = 0;
-
- goto out;
+ reqlock = new_entrylk_lock (pinode, basename, type, dom->domain, frame,
+ conn_id);
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
}
switch (cmd) {
- case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 0, conn_id);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- op_errno = -ret;
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- else
- unwind = 1;
- goto out;
- } else {
- op_ret = 0;
- op_errno = 0;
- unwind = 1;
- goto out;
- }
-
- break;
-
case ENTRYLK_LOCK_NB:
- unwind = 1;
+ nonblock = 1;
+ /* fall through */
+ case ENTRYLK_LOCK:
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pinode->mutex);
{
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 1, conn_id);
+ reqlock->pinode = pinode;
+
+ ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom);
+ if (ret == 0)
+ op_ret = 0;
+ else
+ op_errno = -ret;
+
+ if (ctx && (!ret || !nonblock))
+ list_add (&reqlock->client_list,
+ &ctx->entrylk_lockers);
+
+ if (ret == -EAGAIN && !nonblock) {
+ /* blocked */
+ unwind = 0;
+ } else {
+ __pl_entrylk_unref (reqlock);
+ }
}
pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- break;
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
+ break;
case ENTRYLK_UNLOCK:
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pinode->mutex);
{
- unlocked = __unlock_name (dom, basename, type);
+ unlocked = __unlock_entrylk (dom, reqlock);
+ if (unlocked) {
+ list_del_init (&unlocked->client_list);
+ __pl_entrylk_unref (unlocked);
+ }
+ __pl_entrylk_unref (reqlock);
}
pthread_mutex_unlock (&pinode->mutex);
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked, dom);
+ grant_blocked_entry_locks (this, pinode, dom);
break;
@@ -733,27 +627,10 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
op_ret = 0;
out:
pl_update_refkeeper (this, inode);
+
if (unwind) {
entrylk_trace_out (this, frame, volume, fd, loc, basename,
cmd, type, op_ret, op_errno);
-
- ctx = pl_ctx_get (frame->root->client, this);
-
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
- goto unwind;
- }
-
- if (cmd == ENTRYLK_UNLOCK)
- pl_del_locker (ctx->ltable, volume, loc, fd,
- &frame->root->lk_owner,
- GF_FOP_ENTRYLK);
- else
- pl_add_locker (ctx->ltable, volume, loc, fd,
- frame->root->pid,
- &frame->root->lk_owner,
- GF_FOP_ENTRYLK);
-
unwind:
STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL);
} else {
@@ -761,7 +638,6 @@ unwind:
cmd, type);
}
-
return 0;
}
@@ -801,6 +677,88 @@ pl_fentrylk (call_frame_t *frame, xlator_t *this,
}
+static void
+pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
+{
+ pl_inode_t *pinode = NULL;
+ char *path = NULL;
+ char *file = NULL;
+
+ pinode = lock->pinode;
+
+ inode_path (pinode->refkeeper, NULL, &path);
+
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (pinode->refkeeper->gfid);
+
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, lock->client, (uint64_t) lock->client_pid,
+ lkowner_utoa (&lock->owner));
+ GF_FREE (path);
+}
+
+
+/* Release all entrylks from this client */
+int
+pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
+{
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pinode = NULL;
+
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+
+ pthread_mutex_lock (&ctx->lock);
+ {
+ list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
+ client_list) {
+ list_del_init (&l->client_list);
+ list_add_tail (&l->client_list, &released);
+
+ pl_entrylk_log_cleanup (l);
+
+ pinode = l->pinode;
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_del_init (&l->domain_list);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ }
+ }
+ pthread_mutex_unlock (&ctx->lock);
+
+ list_for_each_entry_safe (l, tmp, &released, client_list) {
+ list_del_init (&l->client_list);
+
+ if (l->frame)
+ STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
+ NULL);
+
+ pinode = l->pinode;
+
+ dom = get_domain (pinode, l->volume);
+
+ grant_blocked_inode_locks (this, pinode, dom);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (l);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ }
+
+ return 0;
+}
+
+
int32_t
__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
{
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 508523e11..969b67a61 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -35,7 +35,7 @@ __pl_inodelk_ref (pl_inode_lock_t *lock)
lock->ref++;
}
-inline void
+void
__pl_inodelk_unref (pl_inode_lock_t *lock)
{
lock->ref--;
@@ -204,7 +204,7 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
int ret = -EINVAL;
conf = __inodelk_grantable (dom, lock);
- if (conf){
+ if (conf) {
ret = -EAGAIN;
if (can_block == 0)
goto out;
@@ -232,7 +232,7 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"Lock is grantable, but blocking to prevent starvation");
gf_log (this->name, GF_LOG_TRACE,
"%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
@@ -307,6 +307,8 @@ __inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
out:
return conf;
}
+
+
static void
__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
struct list_head *granted, pl_dom_list_t *dom)
@@ -363,6 +365,7 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
&lock->user_flock, 0, 0, lock->volume);
STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
+ lock->frame = NULL;
}
pthread_mutex_lock (&pl_inode->mutex);
@@ -375,103 +378,101 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_unlock (&pl_inode->mutex);
}
-/* Release all inodelks from this client */
-static int
-release_inode_locks_of_client (xlator_t *this, pl_dom_list_t *dom,
- inode_t *inode, client_t *client)
+
+static void
+pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
{
- pl_inode_lock_t *tmp = NULL;
- pl_inode_lock_t *l = NULL;
+ pl_inode_t *pl_inode = NULL;
+ char *path = NULL;
+ char *file = NULL;
- pl_inode_t * pinode = NULL;
+ pl_inode = lock->pl_inode;
- struct list_head released;
+ inode_path (pl_inode->refkeeper, NULL, &path);
- char *path = NULL;
- char *file = NULL;
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (pl_inode->refkeeper->gfid);
- INIT_LIST_HEAD (&released);
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, lock->client, (uint64_t) lock->client_pid,
+ lkowner_utoa (&lock->owner));
+ GF_FREE (path);
+}
- pinode = pl_inode_get (this, inode);
- pthread_mutex_lock (&pinode->mutex);
- {
+/* Release all entrylks from this client */
+int
+pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
+{
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ struct list_head released;
- list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
- if (l->client != client)
- continue;
+ INIT_LIST_HEAD (&released);
- list_del_init (&l->blocked_locks);
+ pthread_mutex_lock (&ctx->lock);
+ {
+ list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
+ client_list) {
+ list_del_init (&l->client_list);
+ list_add_tail (&l->client_list, &released);
- inode_path (inode, NULL, &path);
- if (path)
- file = path;
- else
- file = uuid_utoa (inode->gfid);
+ pl_inodelk_log_cleanup (l);
- gf_log (this->name, GF_LOG_DEBUG,
- "releasing blocking lock on %s held by "
- "{client=%p, pid=%"PRId64" lk-owner=%s}",
- file, client, (uint64_t) l->client_pid,
- lkowner_utoa (&l->owner));
+ pl_inode = l->pl_inode;
- list_add (&l->blocked_locks, &released);
- if (path) {
- GF_FREE (path);
- path = NULL;
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __delete_inode_lock (l);
}
+ pthread_mutex_unlock (&pl_inode->mutex);
}
+ }
+ pthread_mutex_unlock (&ctx->lock);
- list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
- if (l->client != client)
- continue;
-
- inode_path (inode, NULL, &path);
- if (path)
- file = path;
- else
- file = uuid_utoa (inode->gfid);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "releasing granted lock on %s held by "
- "{client=%p, pid=%"PRId64" lk-owner=%s}",
- file, client, (uint64_t) l->client_pid,
- lkowner_utoa (&l->owner));
-
- if (path) {
- GF_FREE (path);
- path = NULL;
- }
+ list_for_each_entry_safe (l, tmp, &released, client_list) {
+ list_del_init (&l->client_list);
- __delete_inode_lock (l);
- __pl_inodelk_unref (l);
- }
- }
- GF_FREE (path);
+ if (l->frame)
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
+ NULL);
+
+ pl_inode = l->pl_inode;
- pthread_mutex_unlock (&pinode->mutex);
+ dom = get_domain (pl_inode, l->volume);
- list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
- list_del_init (&l->blocked_locks);
+ grant_blocked_inode_locks (this, pl_inode, dom);
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL);
- //No need to take lock as the locks are only in one list
- __pl_inodelk_unref (l);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (l);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
}
- grant_blocked_inode_locks (this, pinode, dom);
return 0;
}
static int
-pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom)
{
int ret = -EINVAL;
pl_inode_lock_t *retlock = NULL;
gf_boolean_t unref = _gf_true;
+ lock->pl_inode = pl_inode;
+
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pl_inode->mutex);
{
if (lock->fl_type != F_UNLCK) {
@@ -495,6 +496,10 @@ pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
if (can_block)
unref = _gf_false;
}
+
+ if (ctx && (!ret || can_block))
+ list_add_tail (&lock->client_list,
+ &ctx->inodelk_lockers);
} else {
retlock = __inode_unlock_lock (this, lock, dom);
if (!retlock) {
@@ -503,16 +508,21 @@ pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
ret = -EINVAL;
goto out;
}
- __pl_inodelk_unref (retlock);
+ list_del_init (&retlock->client_list);
+ __pl_inodelk_unref (retlock);
ret = 0;
}
- }
out:
- if (unref)
- __pl_inodelk_unref (lock);
+ if (unref)
+ __pl_inodelk_unref (lock);
+ }
pthread_mutex_unlock (&pl_inode->mutex);
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
+
grant_blocked_inode_locks (this, pl_inode, dom);
+
return ret;
}
@@ -552,6 +562,7 @@ new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
INIT_LIST_HEAD (&lock->list);
INIT_LIST_HEAD (&lock->blocked_locks);
+ INIT_LIST_HEAD (&lock->client_list);
__pl_inodelk_ref (lock);
return lock;
@@ -627,6 +638,15 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+ }
+
pinode = pl_inode_get (this, inode);
if (!pinode) {
op_errno = ENOMEM;
@@ -639,27 +659,6 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
- if (frame->root->lk_owner.len == 0) {
- /*
- special case: this means release all locks
- from this client
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from client %p", frame->root->client);
-
- release_inode_locks_of_client (this, dom, inode, frame->root->client);
- _pl_convert_volume (volume, &res1);
- if (res1) {
- dom = get_domain (pinode, res1);
- if (dom)
- release_inode_locks_of_client (this, dom,
- inode, frame->root->client);
- }
-
- op_ret = 0;
- goto unwind;
- }
-
reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
frame, this, volume, conn_id);
@@ -678,8 +677,8 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
case F_SETLK:
memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
- ret = pl_inode_setlk (this, pinode, reqlock,
- can_block, dom);
+ ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block,
+ dom);
if (ret < 0) {
if ((can_block) && (F_UNLCK != flock->l_type)) {
@@ -704,23 +703,6 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- ctx = pl_ctx_get (frame->root->client, this);
-
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
- goto unwind;
- }
-
- if (flock->l_type == F_UNLCK)
- pl_del_locker (ctx->ltable, volume, loc, fd,
- &frame->root->lk_owner,
- GF_FOP_INODELK);
- else
- pl_add_locker (ctx->ltable, volume, loc, fd,
- frame->root->pid,
- &frame->root->lk_owner,
- GF_FOP_INODELK);
-
unwind:
if ((inode != NULL) && (flock !=NULL)) {
pl_update_refkeeper (this, inode);
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 76fc941d7..8c2a6f867 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -65,7 +65,7 @@ struct __pl_inode_lock {
struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
- fd_t *fd;
+ struct __pl_inode *pl_inode;
call_frame_t *frame;
@@ -80,6 +80,8 @@ struct __pl_inode_lock {
pid_t client_pid; /* pid of client process */
char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -103,9 +105,11 @@ typedef struct __pl_dom_list_t pl_dom_list_t;
struct __entry_lock {
struct list_head domain_list; /* list_head back to pl_dom_list_t */
struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+ int ref;
call_frame_t *frame;
xlator_t *this;
+ struct __pl_inode *pinode;
const char *volume;
@@ -115,11 +119,13 @@ struct __entry_lock {
struct timeval blkd_time; /*time at which lock was queued into blkd list*/
struct timeval granted_time; /*time at which lock was queued into active list*/
- void *trans;
+ void *client;
gf_lkowner_t owner;
pid_t client_pid; /* pid of client process */
char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
};
typedef struct __entry_lock pl_entry_lock_t;
@@ -144,12 +150,6 @@ struct __pl_inode {
typedef struct __pl_inode pl_inode_t;
-struct __pl_fd {
- gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
-};
-typedef struct __pl_fd pl_fd_t;
-
-
typedef struct {
gf_boolean_t mandatory; /* if mandatory locking is enabled */
gf_boolean_t trace; /* trace lock requests in and out */
@@ -178,15 +178,27 @@ typedef struct {
} pl_fdctx_t;
+struct _locker {
+ struct list_head lockers;
+ char *volume;
+ inode_t *inode;
+ gf_lkowner_t owner;
+};
+
typedef struct _locks_ctx {
- gf_lock_t ltable_lock; /* only for replace,
- ltable has its own internal
- lock for operations */
- struct _lock_table *ltable;
+ pthread_mutex_t lock;
+ struct list_head inodelk_lockers;
+ struct list_head entrylk_lockers;
} pl_ctx_t;
pl_ctx_t *
pl_ctx_get (client_t *client, xlator_t *xlator);
+int
+pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx);
+
+int
+pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx);
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 7bfb38a51..fce0d509f 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -2243,7 +2243,7 @@ __dump_entrylks (pl_inode_t *pl_inode)
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
"ENTRYLK_WRLCK", lock->basename,
(unsigned long long) lock->client_pid,
- lkowner_utoa (&lock->owner), lock->trans,
+ lkowner_utoa (&lock->owner), lock->client,
lock->connection_id,
ctime_r (&lock->granted_time.tv_sec, granted));
} else {
@@ -2251,7 +2251,7 @@ __dump_entrylks (pl_inode_t *pl_inode)
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
"ENTRYLK_WRLCK", lock->basename,
(unsigned long long) lock->client_pid,
- lkowner_utoa (&lock->owner), lock->trans,
+ lkowner_utoa (&lock->owner), lock->client,
lock->connection_id,
ctime_r (&lock->blkd_time.tv_sec, blocked),
ctime_r (&lock->granted_time.tv_sec, granted));
@@ -2271,7 +2271,7 @@ __dump_entrylks (pl_inode_t *pl_inode)
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
"ENTRYLK_WRLCK", lock->basename,
(unsigned long long) lock->client_pid,
- lkowner_utoa (&lock->owner), lock->trans,
+ lkowner_utoa (&lock->owner), lock->client,
lock->connection_id,
ctime_r (&lock->blkd_time.tv_sec, blocked));
@@ -2524,19 +2524,12 @@ pl_ctx_get (client_t *client, xlator_t *xlator)
if (ctx == NULL)
goto out;
- ctx->ltable = pl_lock_table_new();
-
- if (ctx->ltable == NULL) {
- GF_FREE (ctx);
- ctx = NULL;
- goto out;
- }
-
- LOCK_INIT (&ctx->ltable_lock);
+ pthread_mutex_init (&ctx->lock, NULL);
+ INIT_LIST_HEAD (&ctx->inodelk_lockers);
+ INIT_LIST_HEAD (&ctx->entrylk_lockers);
if (client_ctx_set (client, xlator, ctx) != 0) {
- LOCK_DESTROY (&ctx->ltable_lock);
- GF_FREE (ctx->ltable);
+ pthread_mutex_destroy (&ctx->lock);
GF_FREE (ctx);
ctx = NULL;
}
@@ -2544,82 +2537,44 @@ out:
return ctx;
}
-static void
-ltable_delete_locks (struct _lock_table *ltable)
+
+static int
+pl_client_disconnect_cbk (xlator_t *this, client_t *client)
{
- struct _locker *locker = NULL;
- struct _locker *tmp = NULL;
+ pl_ctx_t *pl_ctx = NULL;
- list_for_each_entry_safe (locker, tmp, &ltable->inodelk_lockers, lockers) {
- if (locker->fd)
- pl_del_locker (ltable, locker->volume, &locker->loc,
- locker->fd, &locker->owner,
- GF_FOP_INODELK);
- GF_FREE (locker->volume);
- GF_FREE (locker);
- }
+ pl_ctx = pl_ctx_get (client, this);
- list_for_each_entry_safe (locker, tmp, &ltable->entrylk_lockers, lockers) {
- if (locker->fd)
- pl_del_locker (ltable, locker->volume, &locker->loc,
- locker->fd, &locker->owner,
- GF_FOP_ENTRYLK);
- GF_FREE (locker->volume);
- GF_FREE (locker);
- }
- GF_FREE (ltable);
+ pl_inodelk_client_cleanup (this, pl_ctx);
+
+ pl_entrylk_client_cleanup (this, pl_ctx);
+
+ return 0;
}
-static int32_t
-destroy_cbk (xlator_t *this, client_t *client)
+static int
+pl_client_destroy_cbk (xlator_t *this, client_t *client)
{
- void *tmp = NULL;
- pl_ctx_t *locks_ctx = NULL;
+ void *tmp = NULL;
+ pl_ctx_t *pl_ctx = NULL;
+
+ pl_client_disconnect_cbk (this, client);
client_ctx_del (client, this, &tmp);
if (tmp == NULL)
- return 0
-;
- locks_ctx = tmp;
- if (locks_ctx->ltable)
- ltable_delete_locks (locks_ctx->ltable);
-
- LOCK_DESTROY (&locks_ctx->ltable_lock);
- GF_FREE (locks_ctx);
-
- return 0;
-}
+ return 0;
+ pl_ctx = tmp;
-static int32_t
-disconnect_cbk (xlator_t *this, client_t *client)
-{
- int32_t ret = 0;
- pl_ctx_t *locks_ctx = NULL;
- struct _lock_table *ltable = NULL;
+ GF_ASSERT (list_empty(&pl_ctx->inodelk_lockers));
+ GF_ASSERT (list_empty(&pl_ctx->entrylk_lockers));
- locks_ctx = pl_ctx_get (client, this);
- if (locks_ctx == NULL) {
- gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
- goto out;
- }
+ pthread_mutex_destroy (&pl_ctx->lock);
+ GF_FREE (pl_ctx);
- LOCK (&locks_ctx->ltable_lock);
- {
- if (locks_ctx->ltable) {
- ltable = locks_ctx->ltable;
- locks_ctx->ltable = pl_lock_table_new ();
- }
- }
- UNLOCK (&locks_ctx->ltable_lock);
-
- if (ltable)
- ltable_delete_locks (ltable);
-
-out:
- return ret;
+ return 0;
}
@@ -2756,8 +2711,8 @@ struct xlator_cbks cbks = {
.forget = pl_forget,
.release = pl_release,
.releasedir = pl_releasedir,
- .client_destroy = destroy_cbk,
- .client_disconnect = disconnect_cbk,
+ .client_destroy = pl_client_destroy_cbk,
+ .client_disconnect = pl_client_disconnect_cbk,
};
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 7156edcad..a531ab123 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -732,10 +732,6 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
}
UNLOCK (&ctx->lock);
- /* We log usage only if quota limit is configured on
- that inode. */
- quota_log_usage (this, ctx, _inode, delta);
-
if (need_validate) {
ret = quota_validate (frame, _inode, this,
quota_validate_cbk);
@@ -763,7 +759,16 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
= space_available;
}
+
+ if (space_available == 0) {
+ op_errno = EDQUOT;
+ goto err;
+ }
}
+
+ /* We log usage only if quota limit is configured on
+ that inode. */
+ quota_log_usage (this, ctx, _inode, delta);
}
if (__is_root_gfid (_inode->gfid)) {
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
index 9e5357255..63e9bcf9f 100644
--- a/xlators/lib/src/libxlator.c
+++ b/xlators/lib/src/libxlator.c
@@ -452,6 +452,61 @@ gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
/* can't use 'min()' macro here as we need to compare two fields
in the array, selectively */
+ if ((host_value_timebuf[0] < host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] < host_timebuf[1]))) {
+ update_timebuf (value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE (net_timebuf);
+
+ return ret;
+}
+
+int
+gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ uint32_t host_value_timebuf[2] = {0,};
+
+ /* stime should be maximum of all the other nodes */
+ ret = dict_get_bin (dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin (value);
+ if (!value_timebuf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -1;
+ goto out;
+ }
+
+ get_hosttime (value_timebuf, host_value_timebuf);
+ get_hosttime (net_timebuf, host_timebuf);
+
+ /* can't use 'max()' macro here as we need to compare two fields
+ in the array, selectively */
if ((host_value_timebuf[0] > host_timebuf[0]) ||
((host_value_timebuf[0] == host_timebuf[0]) &&
(host_value_timebuf[1] > host_timebuf[1]))) {
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
index 08bd77b91..175d3141d 100644
--- a/xlators/lib/src/libxlator.h
+++ b/xlators/lib/src/libxlator.h
@@ -151,4 +151,7 @@ match_uuid_local (const char *name, char *uuid);
int
gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
+int
+gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
+
#endif /* !_LIBXLATOR_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 6c316af88..26d608a2f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -585,10 +585,23 @@ subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo,
static int
subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
- size_t err_len, char *vol_type)
+ size_t err_len, char *vol_type, int replica_count)
{
int i = 0;
int ret = 0;
+ int count = volinfo->replica_count-replica_count;
+
+ if (replica_count) {
+ for (i = 0; i < volinfo->subvol_count; i++) {
+ if (subvols[i] != count) {
+ ret = -1;
+ snprintf (err_str, err_len, "Remove exactly %d"
+ " brick(s) from each subvolume.", count);
+ break;
+ }
+ }
+ return ret;
+ }
do {
@@ -598,7 +611,6 @@ subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
ret = -1;
snprintf (err_str, err_len,
"Bricks not from same subvol for %s", vol_type);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
break;
}
} while (++i < volinfo->subvol_count);
@@ -626,16 +638,11 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
int *subvols = NULL;
- glusterd_brickinfo_t *tmp = NULL;
char err_str[2048] = {0};
gf_cli_rsp rsp = {0,};
void *cli_rsp = NULL;
char vol_type[256] = {0,};
int32_t replica_count = 0;
- int32_t brick_index = 0;
- int32_t tmp_brick_idx = 0;
- int found = 0;
- int diff_count = 0;
char *volname = 0;
xlator_t *this = NULL;
@@ -826,45 +833,6 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
(volinfo->brick_count <= volinfo->dist_leaf_count))
continue;
- if (replica_count) {
- /* do the validation of bricks here */
- /* -2 because i++ is already done, and i starts with 1,
- instead of 0 */
- diff_count = (volinfo->replica_count - replica_count);
- brick_index = (((i -2) / diff_count) * volinfo->replica_count);
- tmp_brick_idx = 0;
- found = 0;
- list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
- tmp_brick_idx++;
- gf_log (this->name, GF_LOG_TRACE,
- "validate brick %s:%s (%d %d %d)",
- tmp->hostname, tmp->path, tmp_brick_idx,
- brick_index, volinfo->replica_count);
- if (tmp_brick_idx <= brick_index)
- continue;
- if (tmp_brick_idx >
- (brick_index + volinfo->replica_count))
- break;
- if ((!strcmp (tmp->hostname,brickinfo->hostname)) &&
- !strcmp (tmp->path, brickinfo->path)) {
- found = 1;
- break;
- }
- }
- if (found)
- continue;
-
- snprintf (err_str, sizeof (err_str), "Bricks are from "
- "same subvol");
- gf_log (this->name, GF_LOG_INFO,
- "failed to validate brick %s:%s (%d %d %d)",
- tmp->hostname, tmp->path, tmp_brick_idx,
- brick_index, volinfo->replica_count);
- ret = -1;
- /* brick order is not valid */
- goto out;
- }
-
/* Find which subvolume the brick belongs to */
subvol_matcher_update (subvols, volinfo, brickinfo);
}
@@ -874,7 +842,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
(volinfo->subvol_count > 1)) {
ret = subvol_matcher_verify (subvols, volinfo,
err_str, sizeof(err_str),
- vol_type);
+ vol_type, replica_count);
if (ret)
goto out;
}
@@ -1825,6 +1793,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
}
case GF_OP_CMD_START:
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -1869,19 +1842,22 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
goto out;
}
- /* Save the list of bricks for later usage. Right now this is required
- * for displaying the task parameters with task status in volume status.
+ /* Save the list of bricks for later usage only on starting a
+ * remove-brick. Right now this is required for displaying the task
+ * parameters with task status in volume status.
*/
- bricks_dict = dict_new ();
- if (!bricks_dict) {
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (bricks_dict, "count", count);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to save remove-brick count");
- goto out;
+ if (GF_OP_CMD_START == cmd) {
+ bricks_dict = dict_new ();
+ if (!bricks_dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (bricks_dict, "count", count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to save remove-brick count");
+ goto out;
+ }
}
while ( i <= count) {
snprintf (key, 256, "brick%d", i);
@@ -1892,20 +1868,22 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
goto out;
}
- brick_tmpstr = gf_strdup (brick);
- if (!brick_tmpstr) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to duplicate brick name");
- goto out;
- }
- ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to add brick to dict");
- goto out;
+ if (GF_OP_CMD_START == cmd) {
+ brick_tmpstr = gf_strdup (brick);
+ if (!brick_tmpstr) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to duplicate brick name");
+ goto out;
+ }
+ ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add brick to dict");
+ goto out;
+ }
+ brick_tmpstr = NULL;
}
- brick_tmpstr = NULL;
ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
&need_rebalance);
@@ -1913,6 +1891,9 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
goto out;
i++;
}
+ if (GF_OP_CMD_START == cmd)
+ volinfo->rebal.dict = dict_ref (bricks_dict);
+
ret = dict_get_int32 (dict, "replica-count", &replica_count);
if (!ret) {
gf_log (this->name, GF_LOG_INFO,
@@ -1937,8 +1918,6 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
}
}
}
- volinfo->rebal.dict = bricks_dict;
- bricks_dict = NULL;
ret = glusterd_create_volfiles_and_notify_services (volinfo);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index 5786694bd..c5c76e11a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -28,17 +28,6 @@
static int
dict_get_param (dict_t *dict, char *key, char **param);
-static int
-glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
- char *conf_path, char **statefile);
-
-static int
-glusterd_get_slave_info (char *slave, char **slave_ip,
- char **slave_vol, char **op_errstr);
-
-static int
-glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen);
-
struct gsync_config_opt_vals_ gsync_confopt_vals[] = {
{.op_name = "change_detector",
.no_of_pos_vals = 2,
@@ -55,6 +44,11 @@ struct gsync_config_opt_vals_ gsync_confopt_vals[] = {
.case_sensitive = _gf_false,
.values = {"critical", "error", "warning", "info", "debug"}
},
+ {.op_name = "use-tarssh",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}
+ },
{.op_name = NULL,
},
};
@@ -74,6 +68,11 @@ static char *gsync_reserved_opts[] = {
NULL
};
+static char *gsync_no_restart_opts[] = {
+ "checkpoint",
+ NULL
+};
+
int
__glusterd_handle_sys_exec (rpcsvc_request_t *req)
{
@@ -899,6 +898,8 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)
}
if (op_match) {
+ if (!op_value)
+ goto out;
val_match = _gf_false;
for (i = 0; i < conf_vals->no_of_pos_vals; i++) {
if(conf_vals->case_sensitive){
@@ -912,7 +913,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)
if (!val_match) {
ret = snprintf (errmsg, sizeof(errmsg) - 1,
- "Invalid values (%s) for"
+ "Invalid value(%s) for"
" option %s", op_value,
op_name);
errmsg[ret] = '\0';
@@ -923,7 +924,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)
}
}
}
-
+out:
return 0;
}
@@ -1581,7 +1582,7 @@ out:
return ret;
}
-static int
+int
glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
char *conf_path, char **statefile)
{
@@ -1736,7 +1737,7 @@ glusterd_verify_slave (char *volname, char *slave_ip, char *slave,
gf_log ("", GF_LOG_ERROR, "Not a valid slave");
ret = glusterd_gsync_read_frm_status (log_file_path,
buf, sizeof(buf));
- if (ret) {
+ if (ret <= 0) {
gf_log ("", GF_LOG_ERROR, "Unable to read from %s",
log_file_path);
goto out;
@@ -2391,6 +2392,8 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave,
char *slave_ip = NULL;
char *slave_vol = NULL;
struct stat stbuf = {0, };
+ gf_boolean_t restart_required = _gf_true;
+ char **resopt = NULL;
GF_ASSERT (slave);
GF_ASSERT (op_errstr);
@@ -2495,18 +2498,28 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave,
out:
if (!ret && volinfo) {
+ for (resopt = gsync_no_restart_opts; *resopt; resopt++) {
+ restart_required = _gf_true;
+ if (!strcmp ((*resopt), op_name)){
+ restart_required = _gf_false;
+ break;
+ }
+ }
+
+ if (restart_required) {
ret = glusterd_check_restart_gsync_session (volinfo, slave,
resp_dict, path_list,
conf_path, 0);
if (ret)
- *op_errstr = gf_strdup ("internal error");
+ *op_errstr = gf_strdup ("internal error");
+ }
}
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
-static int
+int
glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)
{
int ret = 0;
@@ -2530,7 +2543,6 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)
char *p = buf + len - 1;
while (isspace (*p))
*p-- = '\0';
- ret = 0;
}
} else if (ret < 0)
gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt");
@@ -2540,20 +2552,146 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)
}
static int
-glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen)
+dict_get_param (dict_t *dict, char *key, char **param)
+{
+ char *dk = NULL;
+ char *s = NULL;
+ char x = '\0';
+ int ret = 0;
+
+ if (dict_get_str (dict, key, param) == 0)
+ return 0;
+
+ dk = gf_strdup (key);
+ if (!key)
+ return -1;
+
+ s = strpbrk (dk, "-_");
+ if (!s)
+ return -1;
+ x = (*s == '-') ? '_' : '-';
+ *s++ = x;
+ while ((s = strpbrk (s, "-_")))
+ *s++ = x;
+
+ ret = dict_get_str (dict, dk, param);
+
+ GF_FREE (dk);
+ return ret;
+}
+
+static int
+glusterd_parse_gsync_status (char *buf, gf_gsync_status_t *sts_val)
+{
+ int ret = -1;
+ int i = -1;
+ int num_of_fields = 8;
+ char *token = NULL;
+ char **tokens = NULL;
+ char **ptr = NULL;
+ char *save_ptr = NULL;
+ char na_buf[] = "N/A";
+
+ if (!buf) {
+ gf_log ("", GF_LOG_ERROR, "Empty buf");
+ goto out;
+ }
+
+ tokens = calloc (num_of_fields, sizeof (char *));
+ if (!tokens) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+
+ ptr = tokens;
+
+ for (token = strtok_r (buf, ",", &save_ptr); token;
+ token = strtok_r (NULL, ",", &save_ptr)) {
+ *ptr = gf_strdup(token);
+ if (!*ptr) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+ ptr++;
+ }
+
+ for (i = 0; i < num_of_fields; i++) {
+ token = strtok_r (tokens[i], ":", &save_ptr);
+ token = strtok_r (NULL, "\0", &save_ptr);
+ token++;
+
+ /* token NULL check */
+ if (!token && (i != 0) &&
+ (i != 5) && (i != 7))
+ token = na_buf;
+
+ if (i == 0) {
+ if (!token)
+ token = na_buf;
+ else {
+ token++;
+ if (!token)
+ token = na_buf;
+ else
+ token[strlen(token) - 1] = '\0';
+ }
+ memcpy (sts_val->slave_node, token, strlen(token));
+ }
+ if (i == 1)
+ memcpy (sts_val->files_syncd, token, strlen(token));
+ if (i == 2)
+ memcpy (sts_val->purges_remaining, token, strlen(token));
+ if (i == 3)
+ memcpy (sts_val->total_files_skipped, token, strlen(token));
+ if (i == 4)
+ memcpy (sts_val->files_remaining, token, strlen(token));
+ if (i == 5) {
+ if (!token)
+ token = na_buf;
+ else {
+ token++;
+ if (!token)
+ token = na_buf;
+ else
+ token[strlen(token) - 1] = '\0';
+ }
+ memcpy (sts_val->worker_status, token, strlen(token));
+ }
+ if (i == 6)
+ memcpy (sts_val->bytes_remaining, token, strlen(token));
+ if (i == 7) {
+ if (!token)
+ token = na_buf;
+ else {
+ token++;
+ if (!token)
+ token = na_buf;
+ else
+ token[strlen(token) - 2] = '\0';
+ }
+ memcpy (sts_val->crawl_status, token, strlen(token));
+ }
+ }
+
+ ret = 0;
+out:
+ for (i = 0; i< num_of_fields; i++)
+ if (tokens[i])
+ GF_FREE(tokens[i]);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_gsync_fetch_status_extra (char *path, gf_gsync_status_t *sts_val)
{
char sockpath[PATH_MAX] = {0,};
struct sockaddr_un sa = {0,};
- size_t l = 0;
int s = -1;
struct pollfd pfd = {0,};
int ret = 0;
- l = strlen (buf);
- /* seek to end of data in buf */
- buf += l;
- blen -= l;
-
glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath));
strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path));
@@ -2581,66 +2719,40 @@ glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen)
ret = -1;
goto out;
}
- ret = read(s, buf, blen);
+ ret = read(s, sts_val->checkpoint_status,
+ sizeof(sts_val->checkpoint_status));
/* we expect a terminating 0 byte */
- if (ret == 0 || (ret > 0 && buf[ret - 1]))
+ if (ret == 0 || (ret > 0 && sts_val->checkpoint_status[ret - 1]))
ret = -1;
- if (ret > 0)
+ if (ret > 0) {
ret = 0;
+ }
- out:
+out:
close (s);
return ret;
}
-static int
-dict_get_param (dict_t *dict, char *key, char **param)
-{
- char *dk = NULL;
- char *s = NULL;
- char x = '\0';
- int ret = 0;
-
- if (dict_get_str (dict, key, param) == 0)
- return 0;
-
- dk = gf_strdup (key);
- if (!key)
- return -1;
-
- s = strpbrk (dk, "-_");
- if (!s)
- return -1;
- x = (*s == '-') ? '_' : '-';
- *s++ = x;
- while ((s = strpbrk (s, "-_")))
- *s++ = x;
-
- ret = dict_get_str (dict, dk, param);
-
- GF_FREE (dk);
- return ret;
-}
-
-static int
+int
glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,
char *conf_path, dict_t *dict, char *node)
{
- glusterd_conf_t *priv = NULL;
- int ret = 0;
- char *statefile = NULL;
- char *master = NULL;
- char buf[1024] = "defunct";
- char nds[1024] = {0, };
- char mst[1024] = {0, };
- char slv[1024] = {0, };
- char sts[1024] = {0, };
- char *bufp = NULL;
- dict_t *confd = NULL;
- int gsync_count = 0;
- int status = 0;
- char *dyn_node = NULL;
- char *path_list = NULL;
+ char brick_state_file[PATH_MAX] = "";
+ char brick_path[PATH_MAX] = "";
+ char *georep_session_wrkng_dir = NULL;
+ char *master = NULL;
+ char tmp[1024] = "";
+ char sts_val_name[1024] = "";
+ char monitor_status[NAME_MAX] = "";
+ char *statefile = NULL;
+ char *socketfile = NULL;
+ dict_t *confd = NULL;
+ int gsync_count = 0;
+ int i = 0;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_gsync_status_t *sts_val = NULL;
+ glusterd_conf_t *priv = NULL;
GF_ASSERT (THIS);
GF_ASSERT (THIS->private);
@@ -2661,7 +2773,7 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to get configuration data"
"for %s(master), %s(slave)", master, slave);
- goto done;
+ goto out;
}
@@ -2670,120 +2782,168 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,
gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name "
"for %s(master), %s(slave). Please check gsync "
"config file.", master, slave);
- goto done;
+ goto out;
}
- ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf));
- if (ret) {
+
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
gf_log ("", GF_LOG_ERROR, "Unable to read the status"
"file for %s(master), %s(slave)", master, slave);
- strncpy (buf, "defunct", sizeof (buf));
- goto done;
- }
-
- ret = gsync_status (master, slave, conf_path, &status);
- if (ret == 0 && status == -1) {
- if ((strcmp (buf, "Not Started")) &&
- (strcmp (buf, "Stopped")))
- strncpy (buf, "defunct", sizeof (buf));
- goto done;
- } else if (ret == -1) {
- gf_log ("", GF_LOG_ERROR, "Unable to get gsync status");
- goto done;
+ strncpy (monitor_status, "defunct", sizeof (monitor_status));
}
- if (strcmp (buf, "Stable") != 0)
- goto done;
-
- ret = dict_get_param (confd, "state_socket_unencoded", &statefile);
+ ret = dict_get_param (confd, "georep_session_working_dir",
+ &georep_session_wrkng_dir);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded"
- " filepath. Please check gsync config file.");
- goto done;
+ gf_log ("", GF_LOG_ERROR, "Unable to get geo-rep session's "
+ "working directory name for %s(master), %s(slave). "
+ "Please check gsync config file.", master, slave);
+ goto out;
}
- ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf));
+
+ ret = dict_get_param (confd, "state_socket_unencoded", &socketfile);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status"
- "for %s(master), %s(slave)", master, slave);
- /* there is a slight chance that this occurs due to race
- * -- in that case, the following options all seem bad:
- *
- * - suppress irregurlar behavior by just leaving status
- * on "OK"
- * - freak out users with a misleading "defunct"
- * - overload the meaning of the regular error signal
- * mechanism of gsyncd, that is, when status is "faulty"
- *
- * -- so we just come up with something new...
- */
- strncpy (buf, "N/A", sizeof (buf));
- goto done;
+ gf_log ("", GF_LOG_ERROR, "Unable to get socket file's name "
+ "for %s(master), %s(slave). Please check gsync "
+ "config file.", master, slave);
+ goto out;
}
- done:
- if ((!strcmp (buf, "defunct")) ||
- (!strcmp (buf, "Not Started")) ||
- (!strcmp (buf, "Stopped"))) {
- ret = glusterd_get_local_brickpaths (volinfo, &path_list);
- if (!path_list) {
- gf_log ("", GF_LOG_DEBUG, "This node not being part of"
- " volume should not be running gsyncd. Hence"
- " shouldn't display status for this node.");
- ret = 0;
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret)
+ gsync_count = 0;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t),
+ gf_common_mt_gsync_status_t);
+ if (!sts_val) {
+ gf_log ("", GF_LOG_ERROR, "Out Of Memory");
goto out;
}
- }
- ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ /* Creating the brick state file's path */
+ memset(brick_state_file, '\0', PATH_MAX);
+ memcpy (brick_path, brickinfo->path, PATH_MAX - 1);
+ for (i = 0; i < strlen(brick_path) - 1; i++)
+ if (brick_path[i] == '/')
+ brick_path[i] = '_';
+ ret = snprintf(brick_state_file, PATH_MAX - 1, "%s%s.status",
+ georep_session_wrkng_dir, brick_path);
+ brick_state_file[ret] = '\0';
+
+ gf_log ("", GF_LOG_DEBUG, "brick_state_file = %s", brick_state_file);
+
+ memset (tmp, '\0', sizeof(tmp));
+
+ ret = glusterd_gsync_read_frm_status (brick_state_file,
+ tmp, sizeof (tmp));
+ if (ret <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read the status"
+ "file for %s brick for %s(master), %s(slave) "
+ "session", brickinfo->path, master, slave);
+ memcpy (sts_val->slave_node, slave, strlen(slave));
+ sts_val->slave_node[strlen(slave)] = '\0';
+ ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A");
+ sts_val->worker_status[ret] = '\0';
+ ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A");
+ sts_val->checkpoint_status[ret] = '\0';
+ ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A");
+ sts_val->crawl_status[ret] = '\0';
+ ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A");
+ sts_val->files_syncd[ret] = '\0';
+ ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A");
+ sts_val->purges_remaining[ret] = '\0';
+ ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A");
+ sts_val->total_files_skipped[ret] = '\0';
+ ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A");
+ sts_val->files_remaining[ret] = '\0';
+ ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A");
+ sts_val->bytes_remaining[ret] = '\0';
+ goto store_status;
+ }
- if (ret)
- gsync_count = 1;
- else
- gsync_count++;
+ ret = glusterd_gsync_fetch_status_extra (socketfile, sts_val);
+ if (ret || strlen(sts_val->checkpoint_status) == 0) {
+ gf_log ("", GF_LOG_DEBUG, "No checkpoint status"
+ "for %s(master), %s(slave)", master, slave);
+ ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A");
+ sts_val->checkpoint_status[ret] = '\0';
+ }
- (void) snprintf (nds, sizeof (nds), "node%d", gsync_count);
- dyn_node = gf_strdup (node);
- if (!dyn_node)
- goto out;
- ret = dict_set_dynstr (dict, nds, dyn_node);
- if (ret) {
- GF_FREE (dyn_node);
- goto out;
- }
+ ret = glusterd_parse_gsync_status (tmp, sts_val);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to parse the gsync status for %s",
+ brickinfo->path);
+ memcpy (sts_val->slave_node, slave, strlen(slave));
+ sts_val->slave_node[strlen(slave)] = '\0';
+ ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A");
+ sts_val->worker_status[ret] = '\0';
+ ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A");
+ sts_val->checkpoint_status[ret] = '\0';
+ ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A");
+ sts_val->crawl_status[ret] = '\0';
+ ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A");
+ sts_val->files_syncd[ret] = '\0';
+ ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A");
+ sts_val->purges_remaining[ret] = '\0';
+ ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A");
+ sts_val->total_files_skipped[ret] = '\0';
+ ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A");
+ sts_val->files_remaining[ret] = '\0';
+ ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A");
+ sts_val->bytes_remaining[ret] = '\0';
+ }
- snprintf (mst, sizeof (mst), "master%d", gsync_count);
- master = gf_strdup (master);
- if (!master)
- goto out;
- ret = dict_set_dynstr (dict, mst, master);
- if (ret) {
- GF_FREE (master);
- goto out;
- }
+store_status:
+ if ((strcmp (monitor_status, "Stable"))) {
+ memcpy (sts_val->worker_status, monitor_status, strlen(monitor_status));
+ sts_val->worker_status[strlen(monitor_status)] = '\0';
+ ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A");
+ sts_val->crawl_status[ret] = '\0';
+ ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A");
+ sts_val->checkpoint_status[ret] = '\0';
+ }
- snprintf (slv, sizeof (slv), "slave%d", gsync_count);
- slave = gf_strdup (slave);
- if (!slave)
- goto out;
- ret = dict_set_dynstr (dict, slv, slave);
- if (ret) {
- GF_FREE (slave);
- goto out;
- }
+ if (strcmp (sts_val->worker_status, "Active")) {
+ ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A");
+ sts_val->checkpoint_status[ret] = '\0';
+ ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A");
+ sts_val->crawl_status[ret] = '\0';
+ }
- snprintf (sts, sizeof (slv), "status%d", gsync_count);
- bufp = gf_strdup (buf);
- if (!bufp)
- goto out;
- ret = dict_set_dynstr (dict, sts, bufp);
- if (ret) {
- GF_FREE (bufp);
- goto out;
+ if (!strcmp (sts_val->slave_node, "N/A")) {
+ memcpy (sts_val->slave_node, slave, strlen(slave));
+ sts_val->slave_node[strlen(slave)] = '\0';
+ }
+
+ memcpy (sts_val->node, node, strlen(node));
+ sts_val->node[strlen(node)] = '\0';
+ memcpy (sts_val->brick, brickinfo->path, strlen(brickinfo->path));
+ sts_val->brick[strlen(brickinfo->path)] = '\0';
+ memcpy (sts_val->master, master, strlen(master));
+ sts_val->master[strlen(master)] = '\0';
+
+ snprintf (sts_val_name, sizeof (sts_val_name), "status_value%d", gsync_count);
+ ret = dict_set_bin (dict, sts_val_name, sts_val, sizeof(gf_gsync_status_t));
+ if (ret) {
+ GF_FREE (sts_val);
+ goto out;
+ }
+
+ gsync_count++;
+ sts_val = NULL;
}
+
ret = dict_set_int32 (dict, "gsync-count", gsync_count);
if (ret)
goto out;
- out:
+out:
dict_destroy (confd);
return 0;
@@ -3246,30 +3406,32 @@ glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO));
- if (ptr) {
- ret = dict_get_int32 (rsp_dict, "output_count", &output_count);
- if (ret)
- output_count = 1;
- else
- output_count++;
- memset (output_name, '\0', sizeof (output_name));
- snprintf (output_name, sizeof (output_name),
- "output_%d", output_count);
- if (buf[strlen(buf) - 1] == '\n')
- buf[strlen(buf) - 1] = '\0';
- bufp = gf_strdup (buf);
- if (!bufp)
- gf_log ("", GF_LOG_ERROR, "gf_strdup failed.");
- ret = dict_set_dynstr (rsp_dict, output_name, bufp);
- if (ret) {
- GF_FREE (bufp);
- gf_log ("", GF_LOG_ERROR, "output set failed.");
+ do {
+ ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ ret = dict_get_int32 (rsp_dict, "output_count", &output_count);
+ if (ret)
+ output_count = 1;
+ else
+ output_count++;
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", output_count);
+ if (buf[strlen(buf) - 1] == '\n')
+ buf[strlen(buf) - 1] = '\0';
+ bufp = gf_strdup (buf);
+ if (!bufp)
+ gf_log ("", GF_LOG_ERROR, "gf_strdup failed.");
+ ret = dict_set_dynstr (rsp_dict, output_name, bufp);
+ if (ret) {
+ GF_FREE (bufp);
+ gf_log ("", GF_LOG_ERROR, "output set failed.");
+ }
+ ret = dict_set_int32 (rsp_dict, "output_count", output_count);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "output_count set failed.");
}
- ret = dict_set_int32 (rsp_dict, "output_count", output_count);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "output_count set failed.");
- }
+ } while (ptr);
ret = runner_end (&runner);
if (ret) {
@@ -3708,7 +3870,7 @@ out:
}
-static int
+int
glusterd_get_slave_info (char *slave, char **slave_ip,
char **slave_vol, char **op_errstr)
{
@@ -3888,7 +4050,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)
/* gluster-params */
runinit_gsyncd_setrx (&runner, conf_path);
runner_add_args (&runner, "gluster-params",
- "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ "aux-gfid-mount",
".", ".", NULL);
RUN_GSYNCD_CMD;
@@ -3902,6 +4064,16 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)
runner_add_args (&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
+ /* ssh-command tar */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "ssh-command-tar");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/tar_ssh.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
/* pid-file */
runinit_gsyncd_setrx (&runner, conf_path);
runner_add_arg (&runner, "pid-file");
@@ -3909,6 +4081,13 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)
runner_add_args (&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
+ /* geo-rep-working-dir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "georep-session-working-dir");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
/* state-file */
runinit_gsyncd_setrx (&runner, conf_path);
runner_add_arg (&runner, "state-file");
@@ -3986,7 +4165,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)
/* gluster-params */
runinit_gsyncd_setrx (&runner, conf_path);
runner_add_args (&runner, "gluster-params",
- "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ "aux-gfid-mount",
".", NULL);
RUN_GSYNCD_CMD;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index c7bf53b4e..e296509d8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -3725,10 +3725,12 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
"%s:%s", brickinfo->hostname, brickinfo->path);
glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
- if (rpc_clnt_is_disabled (rpc))
- GF_FREE (brickid);
break;
+ case RPC_CLNT_DESTROY:
+ GF_FREE (mydata);
+ mydata = NULL;
+ break;
default:
gf_log (this->name, GF_LOG_TRACE,
"got some other RPC event %d", event);
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
index 0d67d1303..4ce441da8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
@@ -231,7 +231,6 @@ parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc)
const char *georep_mnt_desc_template =
"SUP("
- "xlator-option=\\*-dht.assert-no-child-down=true "
"volfile-server=localhost "
"client-pid=%d "
"user-map-root=%s "
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 0bf7a3352..06ee849f5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -155,6 +155,40 @@ glusterd_op_sm_inject_all_acc ()
return ret;
}
+static int
+glusterd_check_quota_cmd (char *key, char *value, char *errstr, size_t size)
+{
+ int ret = -1;
+ gf_boolean_t b = _gf_false;
+
+ if ((strcmp (key, "quota") == 0) ||
+ (strcmp (key, "features.quota") == 0)) {
+ ret = gf_string2boolean (value, &b);
+ if (ret)
+ goto out;
+ if (b) {
+ snprintf (errstr, size," 'gluster "
+ "volume set <VOLNAME> %s %s' is "
+ "deprecated. Use 'gluster volume "
+ "quota <VOLNAME> enable' instead.",
+ key, value);
+ ret = -1;
+ goto out;
+ } else {
+ snprintf (errstr, size, " 'gluster "
+ "volume set <VOLNAME> %s %s' is "
+ "deprecated. Use 'gluster volume "
+ "quota <VOLNAME> disable' instead.",
+ key, value);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
int
glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo,
gd1_mgmt_brick_op_req **req, dict_t *dict)
@@ -544,6 +578,10 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
}
}
+ ret = glusterd_check_quota_cmd (key, value, errstr, sizeof (errstr));
+ if (ret)
+ goto out;
+
if (is_key_glusterd_hooks_friendly (key))
continue;
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c
index f46f08787..3c8dcf8dd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-quota.c
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.c
@@ -1403,6 +1403,9 @@ glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"greater than INT64_MAX", hard_limit_str);
goto out;
}
+ /*The break statement is missing here to allow intentional fall
+ * through of code execution to the next switch case
+ */
case GF_QUOTA_OPTION_TYPE_REMOVE:
ret = glusterd_get_gfid_from_brick (dict, volinfo, rsp_dict,
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 7911c3d21..b28056135 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -152,7 +152,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
glusterd_store_perform_node_state_store (volinfo);
if (defrag->rpc) {
- rpc_clnt_unref (defrag->rpc);
+ glusterd_rpc_clnt_unref (priv, defrag->rpc);
defrag->rpc = NULL;
}
if (defrag->cbk_fn)
@@ -164,6 +164,9 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
rpc->conn.trans->name);
break;
}
+ case RPC_CLNT_DESTROY:
+ glusterd_volinfo_unref (volinfo);
+ break;
default:
gf_log ("", GF_LOG_TRACE,
"got some other RPC event %d", event);
@@ -234,7 +237,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
goto out;
}
- GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo);
GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv);
snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log",
DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname);
@@ -285,7 +288,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
sleep (5);
- ret = glusterd_rebalance_rpc_create (volinfo);
+ ret = glusterd_rebalance_rpc_create (volinfo, _gf_false);
//FIXME: this cbk is passed as NULL in all occurrences. May be
//we never needed it.
@@ -299,13 +302,21 @@ out:
int
-glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo)
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
+ gf_boolean_t reconnect)
{
dict_t *options = NULL;
char sockfile[PATH_MAX] = {0,};
int ret = -1;
glusterd_defrag_info_t *defrag = volinfo->rebal.defrag;
- glusterd_conf_t *priv = THIS->private;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ struct stat buf = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
//rebalance process is not started
if (!defrag)
@@ -316,7 +327,30 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo)
ret = 0;
goto out;
}
- GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo);
+ /* If reconnecting check if defrag sockfile exists in the new location
+ * in /var/run/ , if it does not try the old location
+ */
+ if (reconnect) {
+ ret = sys_stat (sockfile, &buf);
+ /* TODO: Remove this once we don't need backward compatability
+ * with the older path
+ */
+ if (ret && (errno == ENOENT)) {
+ gf_log (this->name, GF_LOG_WARNING, "Rebalance sockfile "
+ "%s does not exist. Trying old path.",
+ sockfile);
+ GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo,
+ priv);
+ ret =sys_stat (sockfile, &buf);
+ if (ret && (ENOENT == errno)) {
+ gf_log (this->name, GF_LOG_ERROR, "Rebalance "
+ "sockfile %s does not exist.",
+ sockfile);
+ goto out;
+ }
+ }
+ }
/* Setting frame-timeout to 10mins (600seconds).
* Unix domain sockets ensures that the connection is reliable. The
@@ -329,6 +363,7 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo)
goto out;
}
+ glusterd_volinfo_ref (volinfo);
synclock_unlock (&priv->big_lock);
ret = glusterd_rpc_create (&defrag->rpc, options,
glusterd_defrag_notify, volinfo);
@@ -634,6 +669,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
case GF_DEFRAG_CMD_START_FORCE:
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+
ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance "
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 94b0383fe..5c3fc2d82 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -2012,5 +2012,9 @@ out:
else
ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL);
- glusterd_op_sm ();
+ synclock_lock (&priv->big_lock);
+ {
+ glusterd_op_sm ();
+ }
+ synclock_unlock (&priv->big_lock);
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 5b51aabad..d0ad7dcdb 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -166,7 +166,7 @@ out:
if (brickinfo)
glusterd_brickinfo_delete (brickinfo);
if (volinfo)
- glusterd_volinfo_delete (volinfo);
+ glusterd_volinfo_unref (volinfo);
return ret;
}
@@ -883,6 +883,19 @@ out:
return ret;
}
+int
+_gd_store_rebalance_dict (dict_t *dict, char *key, data_t *value, void *data)
+{
+ int ret = -1;
+ int fd = 0;
+
+ fd = *(int *)data;
+
+ ret = gf_store_save_value (fd, key, value->data);
+
+ return ret;
+}
+
int32_t
glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo)
{
@@ -907,9 +920,14 @@ glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo)
if (ret)
goto out;
- if (volinfo->rebal.defrag_cmd) {
- uuid_unparse (volinfo->rebal.rebalance_id, buf);
- ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf);
+ uuid_unparse (volinfo->rebal.rebalance_id, buf);
+ ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf);
+ if (ret)
+ goto out;
+
+ if (volinfo->rebal.dict) {
+ dict_foreach (volinfo->rebal.dict, _gd_store_rebalance_dict,
+ &fd);
}
out:
gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
@@ -1311,12 +1329,14 @@ glusterd_store_global_info (xlator_t *this)
ret = gf_store_rename_tmppath (handle);
out:
- if (ret && (handle->fd > 0))
- gf_store_unlink_tmppath (handle);
+ if (handle) {
+ if (ret && (handle->fd > 0))
+ gf_store_unlink_tmppath (handle);
- if (handle->fd > 0) {
- close (handle->fd);
- handle->fd = 0;
+ if (handle->fd > 0) {
+ close (handle->fd);
+ handle->fd = 0;
+ }
}
if (uuid_str)
@@ -1726,17 +1746,22 @@ out:
int32_t
glusterd_store_retrieve_node_state (char *volname)
{
- int32_t ret = -1;
- glusterd_volinfo_t *volinfo = NULL;
- gf_store_iter_t *iter = NULL;
- char *key = NULL;
- char *value = NULL;
- char volpath[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
- gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *dup_value = NULL;
+ char volpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ dict_t *tmp_dict = NULL;
+ xlator_t *this = NULL;
- priv = THIS->private;
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
@@ -1766,16 +1791,35 @@ glusterd_store_retrieve_node_state (char *volname)
if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
volinfo->rebal.defrag_cmd = atoi (value);
- }
-
- if (volinfo->rebal.defrag_cmd) {
- if (!strncmp (key, GF_REBALANCE_TID_KEY,
- strlen (GF_REBALANCE_TID_KEY)))
- uuid_parse (value, volinfo->rebal.rebalance_id);
-
- if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP,
- strlen (GLUSTERD_STORE_KEY_DEFRAG_OP)))
- volinfo->rebal.op = atoi (value);
+ } else if (!strncmp (key, GF_REBALANCE_TID_KEY,
+ strlen (GF_REBALANCE_TID_KEY))) {
+ uuid_parse (value, volinfo->rebal.rebalance_id);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP,
+ strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) {
+ volinfo->rebal.op = atoi (value);
+ } else {
+ if (!tmp_dict) {
+ tmp_dict = dict_new ();
+ if (!tmp_dict) {
+ ret = -1;
+ goto out;
+ }
+ }
+ dup_value = gf_strdup (value);
+ if (!dup_value) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to strdup value string");
+ goto out;
+ }
+ ret = dict_set_str (tmp_dict, key, dup_value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting data in rebal "
+ "dict.");
+ goto out;
+ }
+ dup_value = NULL;
}
GF_FREE (key);
@@ -1785,9 +1829,13 @@ glusterd_store_retrieve_node_state (char *volname)
ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
}
+ if (tmp_dict)
+ volinfo->rebal.dict = dict_ref (tmp_dict);
- if (op_errno != GD_STORE_EOF)
+ if (op_errno != GD_STORE_EOF) {
+ ret = -1;
goto out;
+ }
ret = gf_store_iter_destroy (iter);
@@ -1795,6 +1843,12 @@ glusterd_store_retrieve_node_state (char *volname)
goto out;
out:
+ if (dup_value)
+ GF_FREE (dup_value);
+ if (ret && volinfo->rebal.dict)
+ dict_unref (volinfo->rebal.dict);
+ if (tmp_dict)
+ dict_unref (tmp_dict);
gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
return ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index bcb2dc703..fc4018190 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -445,6 +445,37 @@ glusterd_check_volume_exists (char *volname)
return _gf_true;
}
+glusterd_volinfo_t *
+glusterd_volinfo_unref (glusterd_volinfo_t *volinfo)
+{
+ int refcnt = -1;
+
+ pthread_mutex_lock (&volinfo->reflock);
+ {
+ refcnt = --volinfo->refcnt;
+ }
+ pthread_mutex_unlock (&volinfo->reflock);
+
+ if (!refcnt) {
+ glusterd_volinfo_delete (volinfo);
+ return NULL;
+ }
+
+ return volinfo;
+}
+
+glusterd_volinfo_t *
+glusterd_volinfo_ref (glusterd_volinfo_t *volinfo)
+{
+ pthread_mutex_lock (&volinfo->reflock);
+ {
+ ++volinfo->refcnt;
+ }
+ pthread_mutex_unlock (&volinfo->reflock);
+
+ return volinfo;
+}
+
int32_t
glusterd_volinfo_new (glusterd_volinfo_t **volinfo)
{
@@ -478,7 +509,8 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo)
new_volinfo->xl = THIS;
- *volinfo = new_volinfo;
+ pthread_mutex_init (&new_volinfo->reflock, NULL);
+ *volinfo = glusterd_volinfo_ref (new_volinfo);
ret = 0;
@@ -571,6 +603,14 @@ out:
return ret;
}
+int
+glusterd_volinfo_remove (glusterd_volinfo_t *volinfo)
+{
+ list_del_init (&volinfo->vol_list);
+ glusterd_volinfo_unref (volinfo);
+ return 0;
+}
+
int32_t
glusterd_volinfo_delete (glusterd_volinfo_t *volinfo)
{
@@ -595,6 +635,7 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo)
glusterd_auth_cleanup (volinfo);
+ pthread_mutex_destroy (&volinfo->reflock);
GF_FREE (volinfo);
ret = 0;
@@ -1054,7 +1095,7 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)
peerctx = peerinfo->rpc->mydata;
peerinfo->rpc->mydata = NULL;
- peerinfo->rpc = rpc_clnt_unref (peerinfo->rpc);
+ peerinfo->rpc = glusterd_rpc_clnt_unref (priv, peerinfo->rpc);
peerinfo->rpc = NULL;
if (peerctx) {
GF_FREE (peerctx->errstr);
@@ -1142,6 +1183,18 @@ glusterd_service_stop (const char *service, char *pidfile, int sig,
"%d", service, pid);
ret = kill (pid, sig);
+ if (ret) {
+ switch (errno) {
+ case ESRCH:
+ gf_log (this->name, GF_LOG_DEBUG, "%s is already stopped",
+ service);
+ ret = 0;
+ goto out;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Failed to kill %s: %s",
+ service, strerror (errno));
+ }
+ }
if (!force_kill)
goto out;
@@ -1466,9 +1519,7 @@ glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
brickinfo->rpc = NULL;
if (rpc) {
- synclock_unlock (&priv->big_lock);
- rpc_clnt_unref (rpc);
- synclock_lock (&priv->big_lock);
+ glusterd_rpc_clnt_unref (priv, rpc);
}
return 0;
@@ -1959,20 +2010,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
- if (volinfo->rebal.defrag_cmd) {
- rebalance_id_str = gf_strdup (uuid_utoa
- (volinfo->rebal.rebalance_id));
- if (!rebalance_id_str) {
- ret = -1;
- goto out;
- }
- memset (key, 0, sizeof (key));
- snprintf (key, 256, "volume%d.rebalance-id", count);
- ret = dict_set_dynstr (dict, key, rebalance_id_str);
- if (ret)
- goto out;
- rebalance_id_str = NULL;
+ rebalance_id_str = gf_strdup (uuid_utoa
+ (volinfo->rebal.rebalance_id));
+ if (!rebalance_id_str) {
+ ret = -1;
+ goto out;
}
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.rebalance-id", count);
+ ret = dict_set_dynstr (dict, key, rebalance_id_str);
+ if (ret)
+ goto out;
+ rebalance_id_str = NULL;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.rebalance-op", count);
@@ -1980,6 +2029,23 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ if (volinfo->rebal.dict) {
+ snprintf (prefix, sizeof (prefix), "volume%d", count);
+ ctx.dict = dict;
+ ctx.prefix = prefix;
+ ctx.opt_count = 1;
+ ctx.key_name = "rebal-dict-key";
+ ctx.val_name = "rebal-dict-value";
+
+ dict_foreach (volinfo->rebal.dict, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebal-dict-count", count);
+ ret = dict_set_int32 (dict, key, ctx.opt_count);
+ if (ret)
+ goto out;
+ }
+
memset (key, 0, sizeof (key));
snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count);
ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status);
@@ -2069,6 +2135,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.decommissioned",
+ count, i);
+ ret = dict_set_int32 (dict, key, brickinfo->decommissioned);
+ if (ret)
+ goto out;
+
i++;
}
@@ -2736,6 +2809,7 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count,
int ret = -1;
char *hostname = NULL;
char *path = NULL;
+ int decommissioned = 0;
glusterd_brickinfo_t *new_brickinfo = NULL;
char msg[2048] = {0};
@@ -2761,12 +2835,22 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count,
goto out;
}
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.decommissioned",
+ vol_count, brick_count);
+ ret = dict_get_int32 (vols, key, &decommissioned);
+ if (ret) {
+ /* For backward compatibility */
+ ret = 0;
+ }
+
ret = glusterd_brickinfo_new (&new_brickinfo);
if (ret)
goto out;
strcpy (new_brickinfo->path, path);
strcpy (new_brickinfo->hostname, hostname);
+ new_brickinfo->decommissioned = decommissioned;
//peerinfo might not be added yet
(void) glusterd_resolve_brick (new_brickinfo);
ret = 0;
@@ -2911,6 +2995,43 @@ out:
return ret;
}
+int
+gd_import_friend_volume_rebal_dict (dict_t *dict, int count,
+ glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char key[256] = {0,};
+ int dict_count = 0;
+ char prefix[64] = {0};
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebal-dict-count", count);
+ ret = dict_get_int32 (dict, key, &dict_count);
+ if (ret) {
+ /* Older peers will not have this dict */
+ ret = 0;
+ goto out;
+ }
+
+ volinfo->rebal.dict = dict_new ();
+ if(!volinfo->rebal.dict) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (prefix, sizeof (prefix), "volume%d", count);
+ ret = import_prdict_dict (dict, volinfo->rebal.dict, "rebal-dict-key",
+ "rebal-dict-value", dict_count, prefix);
+out:
+ if (ret && volinfo->rebal.dict)
+ dict_unref (volinfo->rebal.dict);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
int32_t
glusterd_import_volinfo (dict_t *vols, int count,
glusterd_volinfo_t **volinfo)
@@ -3017,7 +3138,8 @@ glusterd_import_volinfo (dict_t *vols, int count,
if (ret)
gf_log (THIS->name, GF_LOG_INFO,
"peer is possibly old version");
-
+ new_volinfo->subvol_count = new_volinfo->brick_count/
+ glusterd_get_dist_leaf_count (new_volinfo);
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.ckusm", count);
ret = dict_get_uint32 (vols, key, &new_volinfo->cksum);
@@ -3074,19 +3196,16 @@ glusterd_import_volinfo (dict_t *vols, int count,
goto out;
}
- if (new_volinfo->rebal.defrag_cmd) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.rebalance-id", count);
- ret = dict_get_str (vols, key, &rebalance_id_str);
- if (ret) {
- /* This is not present in older glusterfs versions,
- * so don't error out
- */
- ret = 0;
- } else {
- uuid_parse (rebalance_id_str,
- new_volinfo->rebal.rebalance_id);
- }
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebalance-id", count);
+ ret = dict_get_str (vols, key, &rebalance_id_str);
+ if (ret) {
+ /* This is not present in older glusterfs versions,
+ * so don't error out
+ */
+ ret = 0;
+ } else {
+ uuid_parse (rebalance_id_str, new_volinfo->rebal.rebalance_id);
}
memset (key, 0, sizeof (key));
@@ -3098,6 +3217,12 @@ glusterd_import_volinfo (dict_t *vols, int count,
*/
ret = 0;
}
+ ret = gd_import_friend_volume_rebal_dict (vols, count, new_volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to import rebalance dict "
+ "for volume.");
+ goto out;
+ }
memset (key, 0, sizeof (key));
snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count);
@@ -3324,7 +3449,7 @@ glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo,
(void) gf_store_handle_destroy (stale_volinfo->shandle);
stale_volinfo->shandle = NULL;
}
- (void) glusterd_volinfo_delete (stale_volinfo);
+ (void) glusterd_volinfo_remove (stale_volinfo);
return 0;
}
@@ -3344,10 +3469,14 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo,
old = &(old_volinfo->rebal);
new = &(new_volinfo->rebal);
- /* If the task-id's don't match, the old volinfo task is stale and
- * should be cleaned up
- */
- if (uuid_compare (old->rebalance_id, new->rebalance_id)) {
+
+ //Disconnect from rebalance process
+ if (old->defrag && old->defrag->rpc) {
+ rpc_transport_disconnect (old->defrag->rpc->conn.trans);
+ }
+
+ if (!uuid_is_null (old->rebalance_id) &&
+ uuid_compare (old->rebalance_id, new->rebalance_id)) {
(void)gd_stop_rebalance_process (old_volinfo);
goto out;
}
@@ -3362,11 +3491,11 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo,
new->skipped_files = old->skipped_files;
new->rebalance_failures = old->rebalance_failures;
new->rebalance_time = old->rebalance_time;
- new->defrag = old->defrag;
new->dict = (old->dict ? dict_ref (old->dict) : NULL);
/* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume
* import
+ * a new defrag object should come to life with rebalance being restarted
*/
out:
return ret;
@@ -3824,12 +3953,13 @@ int32_t
glusterd_nodesvc_disconnect (char *server)
{
struct rpc_clnt *rpc = NULL;
+ glusterd_conf_t *priv = THIS->private;
rpc = glusterd_nodesvc_get_rpc (server);
(void)glusterd_nodesvc_set_rpc (server, NULL);
if (rpc)
- rpc_clnt_unref (rpc);
+ glusterd_rpc_clnt_unref (priv, rpc);
return 0;
}
@@ -4594,14 +4724,24 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)
{
char *path_list = NULL;
char *slave = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *statefile = NULL;
+ char buf[1024] = "faulty";
int uuid_len = 0;
int ret = 0;
char uuid_str[64] = {0};
- glusterd_volinfo_t *volinfo = NULL;
- char *conf_path = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char confpath[PATH_MAX] = "";
+ char *op_errstr = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (THIS);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (data);
volinfo = data;
- GF_ASSERT (volinfo);
slave = strchr(value->data, ':');
if (slave)
slave ++;
@@ -4611,22 +4751,63 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)
strncpy (uuid_str, (char*)value->data, uuid_len);
+ /* Getting Local Brickpaths */
ret = glusterd_get_local_brickpaths (volinfo, &path_list);
- ret = dict_get_str (this, "conf_path", &conf_path);
+ /*Generating the conf file path needed by gsyncd */
+ ret = glusterd_get_slave_info (slave, &slave_ip,
+ &slave_vol, &op_errstr);
if (ret) {
gf_log ("", GF_LOG_ERROR,
- "Unable to fetch conf file path.");
+ "Unable to fetch slave details.");
+ ret = -1;
goto out;
}
- glusterd_start_gsync (volinfo, slave, path_list, conf_path,
- uuid_str, NULL);
+ ret = snprintf (confpath, sizeof(confpath) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, volinfo->volname,
+ slave_ip, slave_vol);
+ confpath[ret] = '\0';
+
+ /* Fetching the last status of the node */
+ ret = glusterd_get_statefile_name (volinfo, slave,
+ confpath, &statefile);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ gf_log ("", GF_LOG_INFO,
+ "%s is not a valid slave url.", slave);
+ else
+ gf_log ("", GF_LOG_INFO, "Unable to get"
+ " statefile's name");
+ goto out;
+ }
- GF_FREE (path_list);
- path_list = NULL;
+ ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf));
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read the status");
+ goto out;
+ }
+
+ /* Looks for the last status, to find if the sessiom was running
+ * when the node went down. If the session was not started or
+ * not started, do not restart the geo-rep session */
+ if ((!strcmp (buf, "Not Started")) ||
+ (!strcmp (buf, "Stopped"))) {
+ gf_log ("", GF_LOG_INFO,
+ "Geo-Rep Session was not started between "
+ "%s and %s::%s. Not Restarting", volinfo->volname,
+ slave_ip, slave_vol);
+ goto out;
+ }
+
+ glusterd_start_gsync (volinfo, slave, path_list, confpath,
+ uuid_str, NULL);
out:
+ if (path_list)
+ GF_FREE (path_list);
+
return ret;
}
@@ -6045,7 +6226,7 @@ glusterd_delete_volume (glusterd_volinfo_t *volinfo)
if (ret)
goto out;
- ret = glusterd_volinfo_delete (volinfo);
+ glusterd_volinfo_remove (volinfo);
out:
gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret);
return ret;
@@ -6771,7 +6952,7 @@ glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,
case GF_DEFRAG_STATUS_STARTED:
GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
if (gf_is_service_running (pidfile, &pid)) {
- glusterd_rebalance_rpc_create (volinfo);
+ glusterd_rebalance_rpc_create (volinfo, _gf_true);
break;
}
case GF_DEFRAG_STATUS_NOT_STARTED:
@@ -7208,21 +7389,16 @@ glusterd_append_gsync_status (dict_t *dst, dict_t *src)
}
-static int32_t
+int32_t
glusterd_append_status_dicts (dict_t *dst, dict_t *src)
{
- int dst_count = 0;
- int src_count = 0;
- int i = 0;
- int ret = 0;
- char mst[PATH_MAX] = {0,};
- char slv[PATH_MAX] = {0, };
- char sts[PATH_MAX] = {0, };
- char nds[PATH_MAX] = {0, };
- char *mst_val = NULL;
- char *slv_val = NULL;
- char *sts_val = NULL;
- char *nds_val = NULL;
+ char sts_val_name[PATH_MAX] = {0, };
+ int dst_count = 0;
+ int src_count = 0;
+ int i = 0;
+ int ret = 0;
+ gf_gsync_status_t *sts_val = NULL;
+ gf_gsync_status_t *dst_sts_val = NULL;
GF_ASSERT (dst);
@@ -7240,49 +7416,29 @@ glusterd_append_status_dicts (dict_t *dst, dict_t *src)
goto out;
}
- for (i = 1; i <= src_count; i++) {
- snprintf (nds, sizeof(nds), "node%d", i);
- snprintf (mst, sizeof(mst), "master%d", i);
- snprintf (slv, sizeof(slv), "slave%d", i);
- snprintf (sts, sizeof(sts), "status%d", i);
-
- ret = dict_get_str (src, nds, &nds_val);
- if (ret)
- goto out;
-
- ret = dict_get_str (src, mst, &mst_val);
- if (ret)
- goto out;
-
- ret = dict_get_str (src, slv, &slv_val);
- if (ret)
- goto out;
+ for (i = 0; i < src_count; i++) {
+ memset (sts_val_name, '\0', sizeof(sts_val_name));
+ snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i);
- ret = dict_get_str (src, sts, &sts_val);
+ ret = dict_get_bin (src, sts_val_name, (void **) &sts_val);
if (ret)
goto out;
- snprintf (nds, sizeof(nds), "node%d", i+dst_count);
- snprintf (mst, sizeof(mst), "master%d", i+dst_count);
- snprintf (slv, sizeof(slv), "slave%d", i+dst_count);
- snprintf (sts, sizeof(sts), "status%d", i+dst_count);
-
- ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val));
- if (ret)
+ dst_sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t),
+ gf_common_mt_gsync_status_t);
+ if (!dst_sts_val) {
+ gf_log ("", GF_LOG_ERROR, "Out Of Memory");
goto out;
+ }
- ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val));
- if (ret)
- goto out;
+ memcpy (dst_sts_val, sts_val, sizeof(gf_gsync_status_t));
- ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val));
- if (ret)
- goto out;
+ memset (sts_val_name, '\0', sizeof(sts_val_name));
+ snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i + dst_count);
- ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val));
+ ret = dict_set_bin (dst, sts_val_name, dst_sts_val, sizeof(gf_gsync_status_t));
if (ret)
goto out;
-
}
ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count);
@@ -9146,72 +9302,15 @@ glusterd_remove_auxiliary_mount (char *volname)
return ret;
}
-/* Just a minimal callback function to which logs if the request was successfull
- * or not
- */
-int
-_gd_stop_rebalance_process_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *call_frame)
-{
- xlator_t *this = NULL;
- struct syncargs *args = NULL;
- gd1_mgmt_brick_op_rsp rsp = {0,};
- int ret = -1;
- call_frame_t *frame = NULL;
-
- this = THIS;
- GF_ASSERT (this);
-
- frame = call_frame;
- args = frame->local;
- frame->local = NULL;
-
- if (-1 == req->rpc_status) {
- gf_log (this->name, GF_LOG_WARNING, "Failed to stop rebalance "
- "process.");
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Failed to decode stop "
- "rebalance process response.");
- goto out;
- }
-
- gf_log (this->name, GF_LOG_INFO, "Stopping rebalance process was %s.",
- (rsp.op_ret ? "unsuccessful" : "successful"));
-
-out:
- if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0))
- free (rsp.op_errstr);
- free (rsp.output.output_val);
-
- STACK_DESTROY (frame->root);
- __wake (args);
-
- return 0;
-}
-
-int
-gd_stop_rebalance_process_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *call_frame)
-{
- return glusterd_big_locked_cbk (req, iov, count, call_frame,
- _gd_stop_rebalance_process_cbk);
-}
-/* Stops the rebalance process of the given volume, gracefully
+/* Stops the rebalance process of the given volume
*/
int
gd_stop_rebalance_process (glusterd_volinfo_t *volinfo)
{
- int ret = -1;
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- gd1_mgmt_brick_op_req *req = NULL;
- dict_t *req_dict = NULL;
- char *name = NULL;
- struct syncargs args = {0,};
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile[PATH_MAX] = {0,};
GF_ASSERT (volinfo);
@@ -9221,43 +9320,23 @@ gd_stop_rebalance_process (glusterd_volinfo_t *volinfo)
conf = this->private;
GF_ASSERT (conf);
- req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_brick_req_t);
- if (!req) {
- ret = -1;
- goto out;
- }
-
- req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
-
- ret = gf_asprintf(&name, "%s-dht", volinfo->volname);
- if (ret < 0)
- goto out;
- req->name = name;
-
- req_dict = dict_new();
- if (!req_dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_int32 (req_dict, "rebalance-command",
- GF_DEFRAG_CMD_STOP);
- if (ret)
- goto out;
+ GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, conf);
+ ret = glusterd_service_stop ("rebalance", pidfile, SIGTERM, _gf_true);
- ret = dict_allocate_and_serialize (req_dict, &req->input.input_val,
- &req->input.input_len);
- if (ret)
- goto out;
+ return ret;
+}
- GD_SYNCOP (volinfo->rebal.defrag->rpc, (&args), NULL,
- gd_stop_rebalance_process_cbk, req, conf->gfs_mgmt, req->op,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
-out:
+rpc_clnt_t *
+glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc)
+{
+ rpc_clnt_t *ret = NULL;
- GF_FREE (name);
- GF_FREE (req);
- dict_unref (req_dict);
+ GF_ASSERT (conf);
+ GF_ASSERT (rpc);
+ synclock_unlock (&conf->big_lock);
+ ret = rpc_clnt_unref (rpc);
+ synclock_lock (&conf->big_lock);
return ret;
}
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 970b1f8a6..05d5c7172 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -137,6 +137,12 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
gf_boolean_t del_brick);
+glusterd_volinfo_t *
+glusterd_volinfo_ref (glusterd_volinfo_t *volinfo);
+
+glusterd_volinfo_t *
+glusterd_volinfo_unref (glusterd_volinfo_t *volinfo);
+
int32_t
glusterd_volinfo_delete (glusterd_volinfo_t *volinfo);
@@ -583,6 +589,17 @@ glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict,
char **conf_path, char **op_errstr);
int
+glusterd_get_slave_info (char *slave, char **slave_ip,
+ char **slave_vol, char **op_errstr);
+
+int
+glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **statefile);
+
+int
+glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen);
+
+int
glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
dict_t *resp_dict, char *path_list,
char *conf_path, gf_boolean_t is_force);
@@ -624,4 +641,7 @@ glusterd_status_has_tasks (int cmd);
int
gd_stop_rebalance_process (glusterd_volinfo_t *volinfo);
+
+rpc_clnt_t *
+glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index fe33c8d7d..a94a47af3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -3505,7 +3505,7 @@ out:
if (brickinfo)
glusterd_brickinfo_delete (brickinfo);
if (volinfo)
- glusterd_volinfo_delete (volinfo);
+ glusterd_volinfo_unref (volinfo);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 41555230e..df2562ba6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -1696,7 +1696,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
out:
GF_FREE(free_ptr);
if (!vol_added && volinfo)
- glusterd_volinfo_delete (volinfo);
+ glusterd_volinfo_unref (volinfo);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index f209d1ad9..520b0f774 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -831,6 +831,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.option = "root-squash",
.op_version = 2
},
+ { .key = "server.anonuid",
+ .voltype = "protocol/server",
+ .option = "anonuid",
+ .op_version = 3
+ },
+ { .key = "server.anongid",
+ .voltype = "protocol/server",
+ .option = "anongid",
+ .op_version = 3
+ },
{ .key = "server.statedump-path",
.voltype = "protocol/server",
.option = "statedump-path",
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index c3fccf8e1..834a39968 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -594,7 +594,7 @@ configure_syncdaemon (glusterd_conf_t *conf)
/* gluster-params */
runinit_gsyncd_setrx (&runner, conf);
runner_add_args (&runner, "gluster-params",
- "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ "aux-gfid-mount",
".", ".", NULL);
RUN_GSYNCD_CMD;
@@ -608,6 +608,16 @@ configure_syncdaemon (glusterd_conf_t *conf)
runner_add_args (&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
+ /* ssh-command tar */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "ssh-command-tar");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/tar_ssh.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
/* pid-file */
runinit_gsyncd_setrx (&runner, conf);
runner_add_arg (&runner, "pid-file");
@@ -615,6 +625,13 @@ configure_syncdaemon (glusterd_conf_t *conf)
runner_add_args (&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
+ /* geo-rep working dir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "georep-session-working-dir");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
/* state-file */
runinit_gsyncd_setrx (&runner, conf);
runner_add_arg (&runner, "state-file");
@@ -701,7 +718,7 @@ configure_syncdaemon (glusterd_conf_t *conf)
/* gluster-params */
runinit_gsyncd_setrx (&runner, conf);
runner_add_args (&runner, "gluster-params",
- "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ "aux-gfid-mount",
".", NULL);
RUN_GSYNCD_CMD;
@@ -1515,8 +1532,13 @@ struct volume_options options[] = {
{ .key = {"server-quorum-type"},
.type = GF_OPTION_TYPE_STR,
.value = { "none", "server"},
- .description = "If set to server, enables the specified "
- "volume to participate in quorum."
+ .description = "This feature is on the server-side i.e. in glusterd."
+ " Whenever the glusterd on a machine observes that "
+ "the quorum is not met, it brings down the bricks to "
+ "prevent data split-brains. When the network "
+ "connections are brought back up and the quorum is "
+ "restored the bricks in the volume are brought back "
+ "up."
},
{ .key = {"server-quorum-ratio"},
.type = GF_OPTION_TYPE_PERCENT,
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index d2c88609e..e704de44b 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -48,7 +48,7 @@
#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio"
#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version"
#define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub"
-#define GEO_CONF_MAX_OPT_VALS 5
+#define GEO_CONF_MAX_OPT_VALS 6
#define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \
"S56glusterd-geo-rep-create-post.sh"
@@ -329,6 +329,8 @@ struct glusterd_volinfo_ {
int op_version;
int client_op_version;
+ pthread_mutex_t reflock;
+ int refcnt;
};
typedef enum gd_node_type_ {
@@ -457,13 +459,19 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);
snprintf (path, PATH_MAX, "%s/rebalance",vol_path); \
} while (0)
-#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo, priv) do { \
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(path, volinfo, priv) do { \
char defrag_path[PATH_MAX]; \
GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path, \
uuid_utoa(MY_UUID)); \
} while (0)
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) do { \
+ snprintf (path, UNIX_PATH_MAX, DEFAULT_VAR_RUN_DIRECTORY \
+ "/gluster-rebalance-%s.sock", \
+ uuid_utoa(volinfo->volume_id)); \
+ } while (0)
+
#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do { \
char defrag_path[PATH_MAX]; \
GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
@@ -745,7 +753,8 @@ int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
size_t len, int cmd, defrag_cbk_fn_t cbk,
glusterd_op_t op);
int
-glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo);
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
+ gf_boolean_t reconnect);
int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req);
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index ecfe86d5d..ee12d869c 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -3374,8 +3374,6 @@ fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
}
}
- GET_STATE (this, finh, state);
-
fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
rv = fuse_flip_xattr_ns (priv, name, &newkey);
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index bf89e9d52..2799ec847 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -1,20 +1,12 @@
-#!/bin/sh
-# (C) 2006, 2007, 2008 Gluster Inc. <http://www.gluster.com>
+#!/bin/bash
#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of
-# the License, or (at your option) any later version.
+# Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public
-# License along with this program; if not, write to the Free
-# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA 02110-1301 USA
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
_init ()
{
@@ -33,28 +25,44 @@ _init ()
exec_prefix=@exec_prefix@;
cmd_line=$(echo "@sbindir@/glusterfs");
- case `uname -s` in
- NetBSD)
- getinode="stat -f %i"
- getdev="stat -f %d"
- lgetinode="${getinode} -L"
- lgetdev="${getdev} -L"
-
- mounttab=/proc/mounts
- ;;
- Linux)
- getinode="stat -c %i $i"
- getdev="stat -c %d $d"
- lgetinode="${getinode} -L"
- lgetdev="${getdev} -L"
-
- mounttab=/etc/mtab
- ;;
+ # check whether getfattr exists
+ export PATH
+ getfattr=$(which getfattr 2>/dev/null);
+ if [ $? -ne 0 ]; then
+ echo "WARNING: getfattr not found, certain checks will be skipped.."
+ fi
+
+ alias lsL='ls -L'
+ mounttab=/proc/mounts
+ uname_s=`uname -s`
+ case ${uname_s} in
+ NetBSD)
+ getinode="stat -f %i"
+ getdev="stat -f %d"
+ lgetinode="${getinode} -L"
+ lgetdev="${getdev} -L"
+ ;;
+ Linux)
+ getinode="stat -c %i"
+ getdev="stat -c %d"
+ lgetinode="${getinode} -L"
+ lgetdev="${getdev} -L"
+ ;;
esac
UPDATEDBCONF=/etc/updatedb.conf
}
+is_valid_hostname ()
+{
+ local server=$1
+
+ length=$(echo $server | wc -c)
+ if [ ${length} -gt ${HOST_NAME_MAX} ]; then
+ return 1
+ fi
+}
+
parse_backup_volfile_servers ()
{
local server_list=$1
@@ -63,40 +71,43 @@ parse_backup_volfile_servers ()
servers=$(echo ${server_list} | sed 's/\:/ /g')
for server in ${servers}; do
- length=$(echo $server | wc -c)
- if [ ${length} -gt ${HOST_NAME_MAX} ]; then
- echo "Hostname:${server} provided is too long.. skipping"
+ is_valid_hostname ${server}
+ if [ $? -eq 1 ]; then
continue
fi
- new_servers=$(echo "$new_servers $server")
+ new_servers=$(echo "${new_servers} ${server}")
done
+
echo ${new_servers}
}
-parse_backupvolfile_server ()
+parse_volfile_servers ()
{
- local server=$1
+ local server_list=$1
+ local servers=""
+ local new_servers=""
- length=$(echo $server | wc -c)
- if [ ${length} -gt ${HOST_NAME_MAX} ]; then
- echo "Hostname:${server} provided is too long.. exiting"
- exit 1
- fi
+ servers=$(echo ${server_list} | sed 's/,/ /g')
+ for server in ${servers}; do
+ is_valid_hostname ${server}
+ if [ $? -eq 1 ]; then
+ continue
+ fi
+ new_servers=$(echo "${new_servers} ${server}")
+ done
- echo ${server}
+ echo ${new_servers}
}
start_glusterfs ()
{
- # lets the comparsion be case insensitive for all strings
-
if [ -n "$log_level_str" ]; then
- case "$( echo $log_level_str | tr '[a-z]' '[A-Z]')" in
+ case "$( echo $log_level_str | awk '{print toupper($0)}')" in
"ERROR")
log_level=$LOG_ERROR;
;;
"INFO")
- log_level=$LOG_INFO
+ log_level=$LOG_INFO;
;;
"DEBUG")
log_level=$LOG_DEBUG;
@@ -120,7 +131,7 @@ start_glusterfs ()
esac
fi
-#options without values start here
+ # options without values start here
if [ -n "$read_only" ]; then
cmd_line=$(echo "$cmd_line --read-only");
fi
@@ -130,7 +141,7 @@ start_glusterfs ()
fi
if [ -n "$selinux" ]; then
- cmd_line=$(echo "$cmd_line --selinux");
+ cmd_line=$(echo "$cmd_line --selinux");
fi
if [ -n "$enable_ino32" ]; then
@@ -157,7 +168,7 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --aux-gfid-mount");
fi
-#options with values start here
+ # options with values start here
if [ -n "$log_level" ]; then
cmd_line=$(echo "$cmd_line --log-level=$log_level");
fi
@@ -207,8 +218,7 @@ start_glusterfs ()
fi
if [ -n "$xlator_option" ]; then
- xlator_option=$(echo $xlator_option | sed s/"xlator-option="/"--xlator-option "/g)
- cmd_line=$(echo "$cmd_line $xlator_option");
+ cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option");
fi
# for rdma volume, we have to fetch volfile with '.rdma' added
@@ -218,18 +228,31 @@ start_glusterfs ()
if [ -z "$volfile_loc" ]; then
if [ -n "$server_ip" ]; then
- cmd_line=$(echo "$cmd_line --volfile-server=$server_ip");
+ servers=$(parse_volfile_servers ${server_ip});
+ if [ -n "$servers" ]; then
+ for i in $(echo ${servers}); do
+ cmd_line=$(echo "$cmd_line --volfile-server=$i");
+ done
+ else
+ echo "ERROR: No valid servers found on command line.. exiting"
+ print_usage
+ exit 1
+ fi
if [ -n "$backupvolfile_server" ]; then
if [ -z "$backup_volfile_servers" ]; then
- server=$(parse_backupvolfile_server ${backupvolfile_server})
- cmd_line=$(echo "$cmd_line --volfile-server=$server");
+ is_valid_hostname ${backupvolfile_server};
+ if [ $? -eq 1 ]; then
+ echo "ERROR: Invalid backup server specified.. exiting"
+ exit 1
+ fi
+ cmd_line=$(echo "$cmd_line --volfile-server=$backupvolfile_server");
fi
fi
if [ -n "$backup_volfile_servers" ]; then
- servers=$(parse_backup_volfile_servers ${backup_volfile_servers})
- for i in $(echo ${servers}); do
+ backup_servers=$(parse_backup_volfile_servers ${backup_volfile_servers})
+ for i in $(echo ${backup_servers}); do
cmd_line=$(echo "$cmd_line --volfile-server=$i");
done
fi
@@ -261,205 +284,307 @@ start_glusterfs ()
fi
cmd_line=$(echo "$cmd_line $mount_point");
- err=0;
- $cmd_line;
+ $cmd_line;
inode=$( ${getinode} $mount_point 2>/dev/null);
-
# this is required if the stat returns error
- if [ -z "$inode" ]; then
- inode="0";
- fi
-
- if [ $inode -ne 1 ]; then
- err=1;
- fi
-
- if [ $err -eq "1" ]; then
- echo "Mount failed. Please check the log file for more details."
- umount $mount_point > /dev/null 2>&1;
- exit 1;
+ if [ $? -ne 0 ]; then
+ echo "Mount failed. Please check the log file for more details."
+ umount $mount_point > /dev/null 2>&1;
+ exit 1;
fi
}
-usage ()
+print_usage ()
{
-
-echo "Usage: mount.glusterfs <volumeserver>:<volumeid/volumeport> -o <options> <mountpoint>
+cat << EOF
+Usage: $0 <volumeserver>:<volumeid/volumeport> -o<options> <mountpoint>
Options:
-man 8 mount.glusterfs
-
-To display the version number of the mount helper:
-mount.glusterfs --version"
-
+man 8 $0
+To display the version number of the mount helper: $0 -V
+EOF
}
# check for recursive mounts. i.e, mounting over an existing brick
check_recursive_mount ()
{
if [ $1 = "/" ]; then
- echo Cannot mount over root;
+ echo "Cannot mount over root";
exit 2;
fi
+
# GFID check first
# remove trailing / from mount point
mnt_dir=${1%/};
- export PATH;
- # check whether getfattr exists
- which getfattr > /dev/null 2>&1;
- if [ $? -ne 0 ]; then
- return;
- fi
-
- getfattr -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid=";
- if [ $? -eq 0 ]; then
- echo "ERROR: $mnt_dir is in use as a brick of a gluster volume";
- exit 2;
+ if [ -n ${getfattr} ]; then
+ ${getfattr} -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid=";
+ if [ $? -eq 0 ]; then
+ echo "ERROR: $mnt_dir is in use as a brick of a gluster volume";
+ exit 2;
+ fi
fi
# check if the mount point is a brick's parent directory
GLUSTERD_WORKDIR="/var/lib/glusterd";
- ls -L "$GLUSTERD_WORKDIR"/vols/*/bricks/* > /dev/null 2>&1;
+ lsL "$GLUSTERD_WORKDIR"/vols/*/bricks/* > /dev/null 2>&1;
if [ $? -ne 0 ]; then
return;
fi
- brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* | cut -d "=" -f 2`;
+ brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* 2>/dev/null | cut -d "=" -f 2`;
root_inode=`${lgetinode} /`;
root_dev=`${lgetdev} /`;
mnt_inode=`${lgetinode} $mnt_dir`;
mnt_dev=`${lgetdev} $mnt_dir`;
- for brick in "$brick_path";
- do
+ for brick in "$brick_path"; do
# evaluate brick path to see if this is local, if non-local, skip iteration
ls $brick > /dev/null 2>&1;
if [ $? -ne 0 ]; then
continue;
fi
- getfattr -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid=";
- if [ $? -ne 0 ]; then
- continue;
+
+ if [ -n ${getfattr} ]; then
+ ${getfattr} -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid=";
+ if [ $? -eq 0 ]; then
+ # brick is local
+ while [ 1 ]; do
+ tmp_brick="$brick";
+ brick="$brick"/..;
+ brick_dev=`${lgetdev} $brick`;
+ brick_inode=`${lgetinode} $brick`;
+ if [ "$mnt_inode" -eq "$brick_inode" \
+ -a "$mnt_dev" -eq "$brick_dev" ]; then
+ echo "ERROR: ${mnt_dir} is a parent of the brick ${tmp_brick}";
+ exit 2;
+ fi
+ [ "$root_inode" -ne "$brick_inode" \
+ -o "$root_dev" -ne "$brick_dev" ] || break;
+ done;
+ else
+ continue;
+ fi
else
- # brick is local
- while [ 1 ];
- do
- tmp_brick="$brick";
- brick="$brick"/..;
- brick_dev=`${lgetdev} $brick`;
- brick_inode=`${lgetinode} $brick`;
- if [ "$mnt_inode" -eq "$brick_inode" -a "$mnt_dev" -eq "$brick_dev" ]; then
- echo ERROR: $mnt_dir is a parent of the brick $tmp_brick;
- exit 2;
- fi
- [ "$root_inode" -ne "$brick_inode" -o "$root_dev" -ne "$brick_dev" ] || break;
- done;
+ continue;
fi
done;
}
-main ()
+with_options()
{
- helper=$(echo "$@" | sed -n 's/.*\--[ ]*\([^ ]*\).*/\1/p');
- in_opt="no"
- pos_args=0
- for opt in "$@"; do
- if [ "$in_opt" = "yes" ]; then
- for pair in $(echo "$opt" | tr "," " "); do
- # Handle options without values.
- case "$pair" in
- "ro") read_only=1 ;;
- "acl") acl=1 ;;
- "selinux") selinux=1 ;;
- "worm") worm=1 ;;
- "fopen-keep-cache") fopen_keep_cache=1 ;;
- "enable-ino32") enable_ino32=1 ;;
- "mem-accounting") mem_accounting=1;;
- "aux-gfid-mount")
- if [ `uname -s` = "Linux" ]; then
- aux_gfid_mount=1
- fi
- ;;
- # "mount -t glusterfs" sends this, but it's useless.
- "rw") ;;
- # these ones are interpreted during system initialization
- "noauto") ;;
- "_netdev") ;;
- *)
- key=$(echo "$pair" | cut -f1 -d'=');
- value=$(echo "$pair" | cut -f2- -d'=');
-
- # Handle options with values.
- case "$key" in
- "log-level") log_level_str=$value ;;
- "log-file") log_file=$value ;;
- "transport") transport=$value ;;
- "direct-io-mode") direct_io_mode=$value ;;
- "volume-name") volume_name=$value ;;
- "volume-id") volume_id=$value ;;
- "volfile-check") volfile_check=$value ;;
- "server-port") server_port=$value ;;
- "attribute-timeout")
- attribute_timeout=$value ;;
- "entry-timeout") entry_timeout=$value ;;
- "negative-timeout") negative_timeout=$value ;;
- "gid-timeout") gid_timeout=$value ;;
- "background-qlen") bg_qlen=$value ;;
- "backup-volfile-servers") backup_volfile_servers=$value ;;
- ## Place-holder backward compatibility
- "backupvolfile-server") backupvolfile_server=$value ;;
- "congestion-threshold") cong_threshold=$value ;;
- "xlator-option") xlator_option=$xlator_option" "$pair ;;
- "fuse-mountopts") fuse_mountopts=$value ;;
- "use-readdirp") use_readdirp=$value ;;
- *)
- # Passthru
- [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"
- fuse_mountopts="$fuse_mountopts$pair"
- ;;
- esac
- esac
- done
- in_opt="no"
- elif [ "$opt" = "-o" ]; then
- in_opt="yes"
+ local key=$1
+ local value=$2
+
+ # Handle options with values.
+ case "$key" in
+ "log-level")
+ log_level_str=$value
+ ;;
+ "log-file")
+ log_file=$value
+ ;;
+ "transport")
+ transport=$value
+ ;;
+ "direct-io-mode")
+ direct_io_mode=$value
+ ;;
+ "volume-name")
+ volume_name=$value
+ ;;
+ "volume-id")
+ volume_id=$value
+ ;;
+ "volfile-check")
+ volfile_check=$value
+ ;;
+ "server-port")
+ server_port=$value
+ ;;
+ "attribute-timeout")
+ attribute_timeout=$value
+ ;;
+ "entry-timeout")
+ entry_timeout=$value
+ ;;
+ "negative-timeout")
+ negative_timeout=$value
+ ;;
+ "gid-timeout")
+ gid_timeout=$value
+ ;;
+ "background-qlen")
+ bg_qlen=$value
+ ;;
+ "backup-volfile-servers")
+ backup_volfile_servers=$value
+ ;;
+ "backupvolfile-server")
+ backupvolfile_server=$value
+ ;;
+ "congestion-threshold")
+ cong_threshold=$value
+ ;;
+ "xlator-option")
+ xlator_option=$value
+ ;;
+ "fuse-mountopts")
+ fuse_mountopts=$value
+ ;;
+ "use-readdirp")
+ use_readdirp=$value
+ ;;
+ *)
+ echo "Invalid option: $key"
+ exit 0
+ ;;
+ esac
+}
+
+without_options()
+{
+ local option=$1
+ # Handle options without values.
+ case "$option" in
+ "ro")
+ read_only=1
+ ;;
+ "acl")
+ acl=1
+ ;;
+ "selinux")
+ selinux=1
+ ;;
+ "worm")
+ worm=1
+ ;;
+ "fopen-keep-cache")
+ fopen_keep_cache=1
+ ;;
+ "enable-ino32")
+ enable_ino32=1
+ ;;
+ "mem-accounting")
+ mem_accounting=1
+ ;;
+ "aux-gfid-mount")
+ if [ ${uname_s} = "Linux" ]; then
+ aux_gfid_mount=1
+ fi
+ ;;
+ # "mount -t glusterfs" sends this, but it's useless.
+ "rw")
+ ;;
+ # these ones are interpreted during system initialization
+ "noauto")
+ ;;
+ "_netdev")
+ ;;
+ *)
+ echo "Invalid option $option";
+ exit 0
+ ;;
+ esac
+}
+
+parse_options()
+{
+ local optarg=${1}
+ for pair in $(echo ${optarg//,/ }); do
+ key=$(echo "$pair" | cut -f1 -d'=');
+ value=$(echo "$pair" | cut -f2- -d'=');
+ if [ "$key" = "$value" ]; then
+ without_options $pair;
else
- case $pos_args in
- 0) volfile_loc=$opt ;;
- 1) mount_point=$opt ;;
- *) echo "extra arguments at end (ignored)" ;;
- esac
- pos_args=$((pos_args+1))
+ with_options $key $value;
fi
done
- if [ $in_opt = "yes" -o $pos_args -lt 2 ]; then
- usage
- exit 1
- fi
+}
+
+update_updatedb()
+{
+ # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5).
+ # updatedb(8) should not index files under GlusterFS, indexing
+ # GlusterFS is not necessary and should be avoided.
+ # Following code disables updatedb crawl on 'glusterfs'
+ test -f $UPDATEDBCONF && {
+ if ! grep -q 'glusterfs' $UPDATEDBCONF; then
+ sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \
+ > ${UPDATEDBCONF}.bak
+ mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF
+ fi
+ }
+}
+
+main ()
+{
+
+ volfile_loc=$1
+ mount_point=$2
+
+ ## `mount` specifies options as a last argument
+ shift 2;
+ while getopts "Vo:h" opt; do
+ case "${opt}" in
+ o)
+ parse_options ${OPTARG};
+ ;;
+ V)
+ ${cmd_line} -V;
+ exit 0;
+ ;;
+ h)
+ print_usage;
+ exit 0;
+ ;;
+ ?)
+ print_usage;
+ exit 0;
+ ;;
+ esac
+ done
[ -r "$volfile_loc" ] || {
server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p');
- test_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
- [ -n "$test_str" ] && {
- volume_id="$test_str";
+ volume_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
+ [ -n "$volume_str" ] && {
+ volume_id="$volume_str";
}
volfile_loc="";
}
- #
- [ -n "$helper" ] && {
- cmd_line=$(echo "$cmd_line --$helper");
- exec $cmd_line;
+ [ -z "$volume_id" -o -z "$server_ip" ] && {
+ cat <<EOF
+ERROR: Server name/volume name unspecified cannot proceed further..
+Please specify correct format
+Usage:
+man 8 $0
+EOF
+ exit 0;
+ }
+
+ grep_ret=$(echo ${mount_point} | grep '^\-o');
+ [ "x" != "x${grep_ret}" ] && {
+ cat <<EOF
+ERROR: -o options cannot be specified in either first two arguments..
+Please specify correct style
+Usage:
+man 8 $0
+EOF
exit 0;
}
# No need to do a ! -d test, it is taken care while initializing the
# variable mount_point
[ -z "$mount_point" -o ! -d "$mount_point" ] && {
- echo "ERROR: Mount point does not exist."
- usage;
+ cat <<EOF
+ERROR: Mount point does not exist
+Please specify a mount point
+Usage:
+man 8 $0
+EOF
exit 0;
}
@@ -472,16 +597,7 @@ main ()
check_recursive_mount "$mount_point";
- # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5). updatedb(8)
- # should not index files under GlusterFS, indexing will slow down GlusteFS
- # if the filesystem is several TB in size.
- test -f $UPDATEDBCONF && {
- if ! grep -q 'glusterfs' $UPDATEDBCONF; then
- sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \
- > ${UPDATEDBCONF}.bak
- mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF
- fi
- }
+ update_updatedb;
start_glusterfs;
}
diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c
index 59c7637e3..25476ebbe 100644
--- a/xlators/nfs/server/src/acl3.c
+++ b/xlators/nfs/server/src/acl3.c
@@ -66,7 +66,8 @@ nfs3_stat_to_fattr3 (struct iatt *buf);
#define acl3_validate_gluster_fh(handle, status, errlabel) \
do { \
if (!nfs3_fh_validate (handle)) { \
- status = NFS3ERR_SERVERFAULT; \
+ gf_log (GF_ACL, GF_LOG_ERROR, "Bad Handle"); \
+ status = NFS3ERR_BADHANDLE; \
goto errlabel; \
} \
} while (0) \
@@ -255,7 +256,7 @@ acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
cs = frame->local;
getaclreply = &cs->args.getaclreply;
- if (op_ret == -1) {
+ if (op_ret < 0) {
stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto err;
}
@@ -321,6 +322,7 @@ acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
getaclreply *getaclreply = NULL;
int ret = -1;
nfs_user_t nfu = {0, };
+ uint64_t deviceid = 0;
if (!frame->local) {
gf_log (GF_ACL, GF_LOG_ERROR, "Invalid argument,"
@@ -336,14 +338,17 @@ acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
- getaclreply->attr_follows = 1;
+ /* Fill the attrs before xattrs */
+ getaclreply->attr_follows = TRUE;
+ deviceid = nfs3_request_xlator_deviceid (cs->req);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
getaclreply->attr = nfs3_stat_to_fattr3 (buf);
- getaclreply->mask = 0xf;
+
nfs_request_user_init (&nfu, cs->req);
- ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, NULL, NULL,
- acl3_getacl_cbk, cs);
- if (ret == -1) {
- stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ NULL, NULL, acl3_getacl_cbk, cs);
+ if (ret < 0) {
+ stat = nfs3_errno_to_nfsstat3 (-ret);
goto err;
}
return 0;
@@ -409,6 +414,13 @@ acl3svc_getacl (rpcsvc_request_t *req)
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
+
+ /* Validate ACL mask */
+ if (getaclargs.mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) {
+ stat = NFS3ERR_INVAL;
+ goto acl3err;
+ }
+
fhp = &fh;
acl3_validate_gluster_fh (&fh, stat, acl3err);
acl3_map_fh_to_volume (nfs->nfs3state, fhp, req,
@@ -417,6 +429,7 @@ acl3svc_getacl (rpcsvc_request_t *req)
vol, stat, rpcerr);
cs->vol = vol;
+ cs->args.getaclreply.mask = getaclargs.mask;
acl3_volume_started_check (nfs3, vol, ret, acl3err);
ret = nfs3_fh_resolve_and_resume (cs, fhp,
@@ -470,11 +483,13 @@ acl3_setacl_resume (void *carg)
nfs_request_user_init (&nfu, cs->req);
xattr = dict_new();
if (cs->aclcount)
- ret = dict_set_static_bin (xattr, POSIX_ACL_ACCESS_XATTR, cs->aclxattr,
- cs->aclcount * 8 + 4);
+ ret = dict_set_static_bin (xattr, POSIX_ACL_ACCESS_XATTR,
+ cs->aclxattr,
+ posix_acl_xattr_size (cs->aclcount));
if (cs->daclcount)
ret = dict_set_static_bin (xattr, POSIX_ACL_DEFAULT_XATTR,
- cs->daclxattr, cs->daclcount * 8 + 4);
+ cs->daclxattr,
+ posix_acl_xattr_size (cs->daclcount));
ret = nfs_setxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, xattr,
0, NULL, acl3_setacl_cbk, cs);
@@ -536,6 +551,13 @@ acl3svc_setacl (rpcsvc_request_t *req)
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
+
+ /* Validate ACL mask */
+ if (setaclargs.mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) {
+ stat = NFS3ERR_INVAL;
+ goto acl3err;
+ }
+
fhp = &fh;
acl3_validate_gluster_fh (fhp, stat, acl3err);
acl3_map_fh_to_volume (nfs->nfs3state, fhp, req,
diff --git a/xlators/nfs/server/src/acl3.h b/xlators/nfs/server/src/acl3.h
index e0e61281a..03d626f3e 100644
--- a/xlators/nfs/server/src/acl3.h
+++ b/xlators/nfs/server/src/acl3.h
@@ -16,6 +16,12 @@
#define GF_ACL3_PORT 38469
#define GF_ACL GF_NFS"-ACL"
+/* Flags for the getacl/setacl mode */
+#define NFS_ACL 0x0001
+#define NFS_ACLCNT 0x0002
+#define NFS_DFACL 0x0004
+#define NFS_DFACLCNT 0x0008
+
/*
* NFSv3, identifies the default ACL by NFS_ACL_DEFAULT. Gluster
* NFS needs to mask it OFF before sending it upto POSIX layer
diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
index b0824bf10..e86235522 100644
--- a/xlators/nfs/server/src/mount3.c
+++ b/xlators/nfs/server/src/mount3.c
@@ -1318,7 +1318,8 @@ mnt3_parse_dir_exports (rpcsvc_request_t *req, struct mount3_state *ms,
char volname[1024];
struct mnt3_export *exp = NULL;
char *volname_ptr = NULL;
- int ret = -1;
+ int ret = -ENOENT;
+ struct nfs_state *nfs = NULL;
if ((!ms) || (!subdir))
return -1;
@@ -1332,10 +1333,26 @@ mnt3_parse_dir_exports (rpcsvc_request_t *req, struct mount3_state *ms,
if (!exp)
goto err;
+ nfs = (struct nfs_state *)ms->nfsx->private;
+ if (!nfs)
+ goto err;
+
+ if (!nfs_subvolume_started (nfs, exp->vol)) {
+ gf_log (GF_MNT, GF_LOG_DEBUG,
+ "Volume %s not started", exp->vol->name);
+ goto err;
+ }
+
+ if (mnt3_check_client_net (ms, req, exp->vol) == RPCSVC_AUTH_REJECT) {
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Client mount not allowed");
+ ret = -EACCES;
+ goto err;
+ }
+
ret = mnt3_resolve_subdir (req, ms, exp, subdir);
if (ret < 0) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Failed to resolve export dir: %s"
- , subdir);
+ gf_log (GF_MNT, GF_LOG_ERROR,
+ "Failed to resolve export dir: %s", subdir);
goto err;
}
@@ -1375,10 +1392,6 @@ mnt3_find_export (rpcsvc_request_t *req, char *path, struct mnt3_export **e)
}
ret = mnt3_parse_dir_exports (req, ms, path);
- if (ret == 0) {
- ret = -2;
- goto err;
- }
err:
return ret;
@@ -1416,17 +1429,26 @@ mnt3svc_mnt (rpcsvc_request_t *req)
goto rpcerr;
}
- ret = 0;
nfs = (struct nfs_state *)ms->nfsx->private;
gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s", path);
ret = mnt3_find_export (req, path, &exp);
- if (ret == -2) {
- ret = 0;
- goto rpcerr;
- } else if (ret < 0) {
- ret = -1;
- mntstat = MNT3ERR_NOENT;
+ if (ret < 0) {
+ mntstat = mnt3svc_errno_to_mnterr (-ret);
goto mnterr;
+ } else if (!exp) {
+ /*
+ * SPECIAL CASE: exp is NULL if "path" is subdir in
+ * call to mnt3_find_export().
+ *
+ * This is subdir mount, we are already DONE!
+ * nfs_subvolume_started() and mnt3_check_client_net()
+ * validation are done in mnt3_parse_dir_exports()
+ * which is invoked through mnt3_find_export().
+ *
+ * TODO: All mount should happen thorugh mnt3svc_mount()
+ * It needs more clean up.
+ */
+ return (0);
}
if (!nfs_subvolume_started (nfs, exp->vol)) {
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index 8c895c66d..4ab5cbc90 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -1604,8 +1604,8 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
.description = "Disable or enable the AUTH_UNIX authentication type."
- "Must always be enabled for better interoperability."
- "However, can be disabled if needed. Enabled by"
+ "Must always be enabled for better interoperability. "
+ "However, can be disabled if needed. Enabled by "
"default"
},
{ .key = {"rpc-auth.auth-null"},
@@ -1621,8 +1621,8 @@ struct volume_options options[] = {
.description = "Disable or enable the AUTH_UNIX authentication type "
"for a particular exported volume overriding defaults"
" and general setting for AUTH_UNIX scheme. Must "
- "always be enabled for better interoperability."
- "However, can be disabled if needed. Enabled by"
+ "always be enabled for better interoperability. "
+ "However, can be disabled if needed. Enabled by "
"default."
},
{ .key = {"rpc-auth.auth-unix.*.allow"},
@@ -1631,8 +1631,8 @@ struct volume_options options[] = {
.description = "Disable or enable the AUTH_UNIX authentication type "
"for a particular exported volume overriding defaults"
" and general setting for AUTH_UNIX scheme. Must "
- "always be enabled for better interoperability."
- "However, can be disabled if needed. Enabled by"
+ "always be enabled for better interoperability. "
+ "However, can be disabled if needed. Enabled by "
"default."
},
{ .key = {"rpc-auth.auth-null.*"},
@@ -1673,7 +1673,7 @@ struct volume_options options[] = {
.default_value = "none",
.description = "Reject a comma separated list of addresses and/or"
" hostnames from connecting to the server. By default,"
- " all connections are allowed. This allows users to"
+ " all connections are allowed. This allows users to "
"define a rule for a specific exported volume."
},
{ .key = {"rpc-auth.ports.insecure"},
@@ -1770,8 +1770,8 @@ struct volume_options options[] = {
{ .key = {"nfs.*.disable"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "false",
- .description = "This option is used to start or stop NFS server"
- "for individual volume."
+ .description = "This option is used to start or stop the NFS server "
+ "for individual volumes."
},
{ .key = {"nfs.nlm"},
diff --git a/xlators/nfs/server/src/nfs3-helpers.h b/xlators/nfs/server/src/nfs3-helpers.h
index 4de1d5623..eada24221 100644
--- a/xlators/nfs/server/src/nfs3-helpers.h
+++ b/xlators/nfs/server/src/nfs3-helpers.h
@@ -334,4 +334,7 @@ nfs3_is_parentdir_entry (char *entry);
uint32_t
nfs3_request_to_accessbits (int32_t accbits);
+void
+nfs3_map_deviceid_to_statdev (struct iatt *ia, uint64_t deviceid);
+
#endif
diff --git a/xlators/nfs/server/src/nfs3.h b/xlators/nfs/server/src/nfs3.h
index 023b394cf..e64ef9d15 100644
--- a/xlators/nfs/server/src/nfs3.h
+++ b/xlators/nfs/server/src/nfs3.h
@@ -280,4 +280,8 @@ nfs3svc_init (xlator_t *nfsx);
extern int
nfs3_reconfigure_state (xlator_t *nfsx, dict_t *options);
+
+extern uint64_t
+nfs3_request_xlator_deviceid (rpcsvc_request_t *req);
+
#endif
diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c
index 5c5d87412..c186537ea 100644
--- a/xlators/nfs/server/src/nlm4.c
+++ b/xlators/nfs/server/src/nlm4.c
@@ -918,6 +918,8 @@ nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,
case RPC_CLNT_DISCONNECT:
nlm_unset_rpc_clnt (rpc_clnt);
break;
+ default:
+ break;
}
err:
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
index 64c5f6b66..94b8f229b 100644
--- a/xlators/performance/io-cache/src/page.c
+++ b/xlators/performance/io-cache/src/page.c
@@ -990,7 +990,7 @@ __ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
waitq = page->waitq;
page->waitq = NULL;
- gf_log (page->inode->table->xl->name, GF_LOG_WARNING,
+ gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
"page error for page = %p & waitq = %p", page, waitq);
for (trav = waitq; trav; trav = trav->next) {
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index ac3fcb117..d8c5f4262 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -2841,13 +2841,19 @@ struct volume_options options[] = {
{ .key = {"lk-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
- .description = "Enables or disables the lock heal."
+ .description = "When the connection to client is lost, server "
+ "cleans up all the locks held by the client. After "
+ "the connection is restored, the client reacquires "
+ "(heals) the fcntl locks released by the server."
},
{ .key = {"grace-timeout"},
.type = GF_OPTION_TYPE_INT,
.min = 10,
.max = 1800,
- .description = "Sets the grace-timeout value. Valid range 10-1800."
+ .default_value = "10",
+ .description = "Specifies the duration for the lock state to be "
+ "maintained on the client after a network "
+ "disconnection. Range 10-1800 seconds."
},
{.key = {"tcp-window-size"},
.type = GF_OPTION_TYPE_SIZET,
diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c
index 138e601ce..d43571e87 100644
--- a/xlators/protocol/server/src/server-rpc-fops.c
+++ b/xlators/protocol/server/src/server-rpc-fops.c
@@ -2265,7 +2265,7 @@ server_finodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
GF_UNUSED int ret = -1;
server_state_t *state = NULL;
- gf_log (bound_xl->name, GF_LOG_WARNING, "frame %p, xlator %p",
+ gf_log (bound_xl->name, GF_LOG_DEBUG, "frame %p, xlator %p",
frame, bound_xl);
state = CALL_STATE (frame);
@@ -2298,7 +2298,7 @@ server_inodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
GF_UNUSED int ret = -1;
server_state_t *state = NULL;
- gf_log (bound_xl->name, GF_LOG_WARNING, "frame %p, xlator %p",
+ gf_log (bound_xl->name, GF_LOG_DEBUG, "frame %p, xlator %p",
frame, bound_xl);
state = CALL_STATE (frame);
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 702deaa45..56b83cb9a 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -770,7 +770,7 @@ client_destroy_cbk (xlator_t *this, client_t *client)
server_ctx_t *ctx = NULL;
client_ctx_del (client, this, &tmp);
-
+
ctx = tmp;
if (ctx == NULL)
@@ -1052,10 +1052,26 @@ struct volume_options options[] = {
{ .key = {"root-squash"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
- .description = "Map requests from uid/gid 0 to the anonymous "
- "uid/gid. Note that this does not apply to any other"
- "uids or gids that might be equally sensitive, such as"
- "user bin or group staff."
+ .description = "Map requests from uid/gid 0 to the anonymous "
+ "uid/gid. Note that this does not apply to any other "
+ "uids or gids that might be equally sensitive, such "
+ "as user bin or group staff."
+ },
+ { .key = {"anonuid"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "65534", /* RPC_NOBODY_UID */
+ .min = 0,
+ .max = (uint32_t) -1,
+ .description = "value of the uid used for the anonymous "
+ "user/nfsnobody when root-squash is enabled."
+ },
+ { .key = {"anongid"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "65534", /* RPC_NOBODY_GID */
+ .min = 0,
+ .max = (uint32_t) -1,
+ .description = "value of the gid used for the anonymous "
+ "user/nfsnobody when root-squash is enabled."
},
{ .key = {"statedump-path"},
.type = GF_OPTION_TYPE_PATH,
@@ -1085,13 +1101,15 @@ struct volume_options options[] = {
{ .key = {"auth.addr.*.allow"},
.type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
.description = "Allow a comma separated list of addresses and/or "
- "hostnames to connect to the server. By default, all"
- " connections are allowed."
+ "hostnames to connect to the server. Option "
+ "auth.reject overrides this option. By default, all "
+ "connections are allowed."
},
{ .key = {"auth.addr.*.reject"},
.type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
.description = "Reject a comma separated list of addresses and/or "
- "hostnames to connect to the server. By default, all"
+ "hostnames to connect to the server. This option "
+ "overrides the auth.allow option. By default, all"
" connections are allowed."
},
diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c
index 17a9a5f15..4c3a7e14e 100644
--- a/xlators/storage/bd/src/bd.c
+++ b/xlators/storage/bd/src/bd.c
@@ -1015,6 +1015,13 @@ bd_setx_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
+ if (!strcmp (type, BD_THIN) && !(priv->caps & BD_CAPS_THIN)) {
+ gf_log (this->name, GF_LOG_WARNING, "THIN lv not supported by "
+ "this volume");
+ op_errno = EOPNOTSUPP;
+ goto out;
+ }
+
s_size = strtok_r (NULL, ":", &p);
/* If size not specified get default size */
diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c
index 1d8e98631..613709fc8 100644
--- a/xlators/storage/posix/src/posix-handle.c
+++ b/xlators/storage/posix/src/posix-handle.c
@@ -701,16 +701,8 @@ posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat
return -1;
}
-#ifdef HAVE_LINKAT
- /*
- * Use linkat if the target may be a symlink to a directory
- * or without an existing target. See comment about linkat()
- * usage in posix_link() in posix.c for details
- */
- ret = linkat (AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
-#else
- ret = link (oldpath, newpath);
-#endif
+ ret = sys_link (oldpath, newpath);
+
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"link %s -> %s failed (%s)",
@@ -882,16 +874,7 @@ posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
MAKE_HANDLE_PATH (newpath, this, gfid, NULL);
ret = lstat (newpath, &stbuf);
if (!ret) {
-#ifdef HAVE_LINKAT
- /*
- * Use linkat if the target may be a symlink to a directory
- * or without an existing target. See comment about linkat()
- * usage in posix_link() in posix.c for details
- */
- ret = linkat (AT_FDCWD, newpath, AT_FDCWD, real_path, 0);
-#else
- ret = link (newpath, real_path);
-#endif
+ ret = sys_link (newpath, real_path);
}
return ret;
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 0e187e020..3a66ecfc2 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -781,7 +781,10 @@ posix_handle_pair (xlator_t *this, const char *real_path,
int sys_ret = -1;
int ret = 0;
- if (ZR_FILE_CONTENT_REQUEST(key)) {
+ if (XATTR_IS_PATHINFO (key)) {
+ ret = -EACCES;
+ goto out;
+ } else if (ZR_FILE_CONTENT_REQUEST(key)) {
ret = posix_set_file_contents (this, real_path, key, value,
flags);
} else {
@@ -789,6 +792,7 @@ posix_handle_pair (xlator_t *this, const char *real_path,
value->len, flags);
if (sys_ret < 0) {
+ ret = -errno;
if (errno == ENOTSUP) {
GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
this->name,GF_LOG_WARNING,
@@ -820,7 +824,6 @@ posix_handle_pair (xlator_t *this, const char *real_path,
#endif /* DARWIN */
}
- ret = -errno;
goto out;
}
}
@@ -835,10 +838,16 @@ posix_fhandle_pair (xlator_t *this, int fd,
int sys_ret = -1;
int ret = 0;
+ if (XATTR_IS_PATHINFO (key)) {
+ ret = -EACCES;
+ goto out;
+ }
+
sys_ret = sys_fsetxattr (fd, key, value->data,
value->len, flags);
if (sys_ret < 0) {
+ ret = -errno;
if (errno == ENOTSUP) {
GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
this->name,GF_LOG_WARNING,
@@ -865,7 +874,6 @@ posix_fhandle_pair (xlator_t *this, int fd,
#endif /* DARWIN */
}
- ret = -errno;
goto out;
}
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index b72cd5e0f..83b689d06 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1930,18 +1930,9 @@ posix_link (call_frame_t *frame, xlator_t *this,
goto out;
}
-#ifdef HAVE_LINKAT
- /*
- * On most systems (Linux being the notable exception), link(2)
- * first resolves symlinks. If the target is a directory or
- * is nonexistent, it will fail. linkat(2) operates on the
- * symlink instead of its target when the AT_SYMLINK_FOLLOW
- * flag is not supplied.
- */
- op_ret = linkat (AT_FDCWD, real_oldpath, AT_FDCWD, real_newpath, 0);
-#else
- op_ret = link (real_oldpath, real_newpath);
-#endif
+
+ op_ret = sys_link (real_oldpath, real_newpath);
+
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -2950,8 +2941,10 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
filler.flags = flags;
op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair,
&filler);
- if (op_ret < 0)
+ if (op_ret < 0) {
op_errno = -op_ret;
+ op_ret = -1;
+ }
out:
SET_TO_OLD_FS_ID ();
@@ -3430,8 +3423,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
}
goto done;
}
- if (loc->inode && name &&
- (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {
+ if (loc->inode && name && (XATTR_IS_PATHINFO (name))) {
if (LOC_HAS_ABSPATH (loc))
MAKE_REAL_PATH (rpath, this, loc->path);
else
@@ -3451,8 +3443,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
goto done;
}
size = strlen (dyn_rpath) + 1;
- ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY,
- dyn_rpath);
+ ret = dict_set_dynstr (dict, (char *)name, dyn_rpath);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"could not set value (%s) in dictionary",
@@ -3918,8 +3909,10 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
filler.flags = flags;
op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair,
&filler);
- if (op_ret < 0)
+ if (op_ret < 0) {
op_errno = -op_ret;
+ op_ret = -1;
+ }
out:
SET_TO_OLD_FS_ID ();
@@ -5093,9 +5086,22 @@ posix_set_owner (xlator_t *this, uid_t uid, gid_t gid)
{
struct posix_private *priv = NULL;
int ret = -1;
+ struct stat st = {0,};
priv = this->private;
+ ret = sys_lstat (priv->base_path, &st);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to stat "
+ "brick path %s (%s)",
+ priv->base_path, strerror (errno));
+ return ret;
+ }
+
+ if ((uid == -1 || st.st_uid == uid) &&
+ (gid == -1 || st.st_gid == gid))
+ return 0;
+
ret = sys_chown (priv->base_path, uid, gid);
if (ret)
gf_log (this->name, GF_LOG_ERROR, "Failed to set "
@@ -5131,15 +5137,16 @@ reconfigure (xlator_t *this, dict_t *options)
{
int ret = -1;
struct posix_private *priv = NULL;
- uid_t uid = -1;
- gid_t gid = -1;
+ int32_t uid = -1;
+ int32_t gid = -1;
char *batch_fsync_mode_str = NULL;
priv = this->private;
- GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out);
- GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out);
- posix_set_owner (this, uid, gid);
+ GF_OPTION_RECONF ("brick-uid", uid, options, int32, out);
+ GF_OPTION_RECONF ("brick-gid", gid, options, int32, out);
+ if (uid != -1 || gid != -1)
+ posix_set_owner (this, uid, gid);
GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
options, uint32, out);
@@ -5205,8 +5212,8 @@ init (xlator_t *this)
uuid_t gfid = {0,};
uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
char *guuid = NULL;
- uid_t uid = -1;
- gid_t gid = -1;
+ int32_t uid = -1;
+ int32_t gid = -1;
char *batch_fsync_mode_str;
dir_data = dict_get (this->options, "directory");
@@ -5541,9 +5548,10 @@ init (xlator_t *this)
_private->aio_init_done = _gf_false;
_private->aio_capable = _gf_false;
- GF_OPTION_INIT ("brick-uid", uid, uint32, out);
- GF_OPTION_INIT ("brick-gid", gid, uint32, out);
- posix_set_owner (this, uid, gid);
+ GF_OPTION_INIT ("brick-uid", uid, int32, out);
+ GF_OPTION_INIT ("brick-gid", gid, int32, out);
+ if (uid != -1 || gid != -1)
+ posix_set_owner (this, uid, gid);
GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out);
@@ -5704,15 +5712,17 @@ struct volume_options options[] = {
{
.key = {"brick-uid"},
.type = GF_OPTION_TYPE_INT,
- .min = 0,
+ .min = -1,
.validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "-1",
.description = "Support for setting uid of brick's owner"
},
{
.key = {"brick-gid"},
.type = GF_OPTION_TYPE_INT,
- .min = 0,
+ .min = -1,
.validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "-1",
.description = "Support for setting gid of brick's owner"
},
{ .key = {"node-uuid-pathinfo"},