diff options
author | Kevin Vigor <kvigor@fb.com> | 2017-01-05 12:21:20 -0800 |
---|---|---|
committer | Kevin Vigor <kvigor@fb.com> | 2017-01-05 12:21:20 -0800 |
commit | c27aa58e72cf528583c585691e65abdb765535e5 (patch) | |
tree | fae75e5b924ac4fb80a3d4ed42203638732fbb52 | |
parent | 63403742f53ec59a6acbe26ff4c39bab1b0842ed (diff) | |
parent | cb8bc3396d16e777d9a2683886fefd43e747e8a3 (diff) |
Merge remote-tracking branch 'origin/release-3.8' into merge-3.8-again
Change-Id: I844adf2aef161a44d446f8cd9b7ebcb224ee618a
Signed-off-by: Kevin Vigor <kvigor@fb.com>
87 files changed, 1992 insertions, 584 deletions
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h index f69df15715c..6ff9aada0c5 100644 --- a/api/src/glfs-internal.h +++ b/api/src/glfs-internal.h @@ -327,17 +327,26 @@ do { \ we can give up the mutex during syncop calls so that bottom up calls (particularly CHILD_UP notify) can do a mutex_lock() on @glfs without deadlocking - the filesystem + the filesystem. + + All the fops should wait for graph migration to finish + before starting the fops. Therefore these functions should + call glfs_lock with wait_for_migration as true. But waiting + for migration to finish in call-back path can result thread + dead-locks. The reason for this is we only have finite + number of epoll threads. so if we wait on epoll threads + there will not be any thread left to handle outstanding + rpc replies. */ static inline int -glfs_lock (struct glfs *fs) +glfs_lock (struct glfs *fs, gf_boolean_t wait_for_migration) { pthread_mutex_lock (&fs->mutex); while (!fs->init) pthread_cond_wait (&fs->cond, &fs->mutex); - while (fs->migration_in_progress) + while (wait_for_migration && fs->migration_in_progress) pthread_cond_wait (&fs->cond, &fs->mutex); return 0; diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c index b84e5d8f58c..f8b437bab0e 100644 --- a/api/src/glfs-resolve.c +++ b/api/src/glfs-resolve.c @@ -784,7 +784,7 @@ glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd) { fd_t *fd = NULL; - glfs_lock (fs); + glfs_lock (fs, _gf_true); { fd = __glfs_resolve_fd (fs, subvol, glfd); } @@ -897,12 +897,17 @@ priv_glfs_subvol_done (struct glfs *fs, xlator_t *subvol) if (!subvol) return; - glfs_lock (fs); + /* For decrementing subvol->wind ref count we need not check/wait for + * migration-in-progress flag. + * Also glfs_subvol_done is called in call-back path therefore waiting + * fot migration-in-progress flag can lead to dead-lock. + */ + glfs_lock (fs, _gf_false); { ref = (--subvol->winds); active_subvol = fs->active_subvol; } - glfs_unlock (fs); + glfs_unlock (fs); if (ref == 0) { assert (subvol != active_subvol); @@ -919,7 +924,7 @@ priv_glfs_active_subvol (struct glfs *fs) xlator_t *subvol = NULL; xlator_t *old_subvol = NULL; - glfs_lock (fs); + glfs_lock (fs, _gf_true); { subvol = __glfs_active_subvol (fs); @@ -968,7 +973,7 @@ glfs_cwd_set (struct glfs *fs, inode_t *inode) { int ret = 0; - glfs_lock (fs); + glfs_lock (fs, _gf_true); { ret = __glfs_cwd_set (fs, inode); } @@ -1001,7 +1006,7 @@ glfs_cwd_get (struct glfs *fs) { inode_t *cwd = NULL; - glfs_lock (fs); + glfs_lock (fs, _gf_true); { cwd = __glfs_cwd_get (fs); } @@ -1041,7 +1046,7 @@ glfs_resolve_inode (struct glfs *fs, xlator_t *subvol, { inode_t *inode = NULL; - glfs_lock (fs); + glfs_lock (fs, _gf_true); { inode = __glfs_resolve_inode(fs, subvol, object); } diff --git a/api/src/glfs.c b/api/src/glfs.c index f7e271208ec..2ec319fe3e6 100644 --- a/api/src/glfs.c +++ b/api/src/glfs.c @@ -149,8 +149,6 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) LOCK_INIT (&pool->lock); ctx->pool = pool; - LOCK_INIT (&ctx->lock); - ret = 0; err: if (ret && pool) { @@ -551,7 +549,7 @@ glfs_fd_destroy (void *data) glfd = (struct glfs_fd *)data; - glfs_lock (glfd->fs); + glfs_lock (glfd->fs, _gf_true); { list_del_init (&glfd->openfds); } @@ -594,7 +592,7 @@ glfs_fd_bind (struct glfs_fd *glfd) fs = glfd->fs; - glfs_lock (fs); + glfs_lock (fs, _gf_true); { list_add_tail (&glfd->openfds, &fs->openfds); } @@ -884,7 +882,7 @@ glfs_init_wait (struct glfs *fs) int ret = -1; /* Always a top-down call, use glfs_lock() */ - glfs_lock (fs); + glfs_lock (fs, _gf_true); { while (!fs->init) pthread_cond_wait (&fs->cond, @@ -1258,7 +1256,7 @@ pub_glfs_get_volfile (struct glfs *fs, void *buf, size_t len) DECLARE_OLD_THIS; __GLFS_ENTRY_VALIDATE_FS (fs, invalid_fs); - glfs_lock(fs); + glfs_lock(fs, _gf_true); if (len >= fs->oldvollen) { gf_msg_trace ("glfs", 0, "copying %zu to %p", len, buf); memcpy(buf,fs->oldvolfile,len); diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 692bd26e969..8a446595e79 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -13,6 +13,7 @@ #include <stdint.h> #include <pthread.h> #include <fnmatch.h> +#include <time.h> #include "cli.h" #include "cli-cmd.h" @@ -2366,6 +2367,9 @@ config_parse (const char **words, int wordcount, dict_t *dict, char *append_str = NULL; size_t append_len = 0; char *subop = NULL; + char *ret_chkpt = NULL; + struct tm checkpoint_time; + char chkpt_buf[20] = ""; switch ((wordcount - 1) - cmdi) { case 0: @@ -2427,6 +2431,27 @@ config_parse (const char **words, int wordcount, dict_t *dict, } snprintf (append_str, 300, "%" GF_PRI_SECOND, tv.tv_sec); + } else if ((strcmp (words[cmdi + 1], "checkpoint") == 0) && + (strcmp (append_str, "now") != 0)) { + memset(&checkpoint_time, 0, sizeof(struct tm)); + ret_chkpt = strptime(append_str, "%Y-%m-%d %H:%M:%S", + &checkpoint_time); + + if (ret_chkpt == NULL) { + ret = -1; + cli_err ("Invalid Checkpoint label. Use format " + "\"Y-m-d H:M:S\", Example: 2016-10-25 15:30:45"); + goto out; + } + GF_FREE (append_str); + append_str = GF_CALLOC (1, 300, cli_mt_append_str); + if (!append_str) { + ret = -1; + goto out; + } + strftime (chkpt_buf, sizeof(chkpt_buf), "%s", + &checkpoint_time); + snprintf (append_str, 300, "%s", chkpt_buf); } ret = dict_set_dynstr (dict, "op_value", append_str); @@ -3586,7 +3611,8 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, if (wordcount == 5) { if (strcmp (words[3], "info") && - strcmp (words[3], "statistics")) { + strcmp (words[3], "statistics") && + strcmp (words[3], "granular-entry-heal")) { ret = -1; goto out; } @@ -3616,6 +3642,19 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, goto done; } } + + if (!strcmp (words[3], "granular-entry-heal")) { + if (!strcmp (words[4], "enable")) { + ret = dict_set_int32 (dict, "heal-op", + GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE); + goto done; + } else if (!strcmp (words[4], "disable")) { + ret = dict_set_int32 (dict, "heal-op", + GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE); + goto done; + } + } + ret = -1; goto out; } diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index c721171f517..0d25279f381 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -2151,7 +2151,8 @@ cli_print_brick_status (cli_volume_status_t *status) (op == GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME) ||\ (op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) || \ (op == GF_SHD_OP_INDEX_SUMMARY) || \ - (op == GF_SHD_OP_SPLIT_BRAIN_FILES)) + (op == GF_SHD_OP_SPLIT_BRAIN_FILES) || \ + (op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)) int cli_launch_glfs_heal (int heal_op, dict_t *options) @@ -2200,6 +2201,10 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) runner_add_args (&runner, "xml", NULL); } break; + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE: + runner_add_args (&runner, "granular-entry-heal-op", NULL); + break; default: ret = -1; } @@ -2211,11 +2216,11 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) printf ("%s", out); } ret = runner_end (&runner); - ret = WEXITSTATUS (ret); out: return ret; } + int cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word, const char **words, int wordcount) @@ -2252,19 +2257,19 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word, goto out; if (NEEDS_GLFS_HEAL (heal_op)) { ret = cli_launch_glfs_heal (heal_op, options); - if (ret == -1) + if (ret < 0) + goto out; + if (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) goto out; } - else { - proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME]; - CLI_LOCAL_INIT (local, words, frame, options); + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME]; - if (proc->fn) { - ret = proc->fn (frame, THIS, options); - } - } + CLI_LOCAL_INIT (local, words, frame, options); + if (proc->fn) { + ret = proc->fn (frame, THIS, options); + } out: if (ret) { cli_cmd_sent_status_get (&sent); @@ -2673,7 +2678,8 @@ struct cli_cmd volume_cmds[] = { "statistics [heal-count [replica <HOSTNAME:BRICKNAME>]] |" "info [healed | heal-failed | split-brain] |" "split-brain {bigger-file <FILE> | latest-mtime <FILE> |" - "source-brick <HOSTNAME:BRICKNAME> [<FILE>]}]", + "source-brick <HOSTNAME:BRICKNAME> [<FILE>]} |" + "granular-entry-heal {enable | disable}]", cli_cmd_volume_heal_cbk, "self-heal commands on volume specified by <VOLNAME>"}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index d88ddd7f136..602d3ff1611 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -8670,6 +8670,14 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, operation = ""; heal_op_str = "Disable heal"; break; + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: + operation = ""; + heal_op_str = "Enable granular entry heal"; + break; + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE: + operation = ""; + heal_op_str = "Disable granular entry heal"; + break; } if (rsp.op_ret) { diff --git a/cli/src/cli.c b/cli/src/cli.c index 77d2a410fb4..fa507309e80 100644 --- a/cli/src/cli.c +++ b/cli/src/cli.c @@ -154,8 +154,6 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) LOCK_INIT (&pool->lock); ctx->pool = pool; - LOCK_INIT (&ctx->lock); - cmd_args = &ctx->cmd_args; INIT_LIST_HEAD (&cmd_args->xlator_options); diff --git a/configure.ac b/configure.ac index a8104056fbb..86c6bcfcc4d 100644 --- a/configure.ac +++ b/configure.ac @@ -1194,8 +1194,8 @@ if test "x$enable_glupy" = "xyes"; then dnl Find python libs at user configured libdir and also "lib" under prefix PYTHONDEV_LDFLAGS="${PYTHON_LIBS} -L`${PYTHON}-config --prefix`/lib -L`${PYTHON}-config --prefix`/$libdir `${PYTHON}-config --ldflags`" - BUILD_PYTHON_SITE_PACKAGES=`$PYTHON -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib(prefix="${exec_prefix}"))'` - BUILD_PYTHON_INC=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_inc()"` + BUILD_PYTHON_SITE_PACKAGES=${pythondir} + BUILD_PYTHON_INC=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_inc())" 2>/dev/null` BUILD_PYTHON_LIB=python$PYTHON_VERSION GLUPY_SUBDIR=glupy diff --git a/doc/release-notes/3.8.6.md b/doc/release-notes/3.8.6.md new file mode 100644 index 00000000000..1ad77f3dbf8 --- /dev/null +++ b/doc/release-notes/3.8.6.md @@ -0,0 +1,60 @@ +# Release notes for Gluster 3.8.6 + +This is a bugfix release. The [Release Notes for 3.8.0](3.8.0.md), +[3.8.1](3.8.1.md), [3.8.2](3.8.2.md), [3.8.3](3.8.3.md), [3.8.4](3.8.4.md) and +[3.8.5](3.8.5.md) contain a listing of all the new features that were added and +bugs fixed in the GlusterFS 3.8 stable release. + + +## Change in port allocation, may affect deployments with strict firewalls + +'''Problem description''': GlusterD used to assume that the brick port which +was previously allocated to a brick, would still be available, and in doing so +would reuse the port for the brick without registering with the port map +server. The port map server would not be aware of the brick reusing the same +port, and try to allocate it to another process, and in turn result in that +process' failure to connect to the port. + +'''Fix and port usage changes''': With the fix, we force GlusterD to unregister +a port previously used by the brick, and register a new port with the port map +server and then use it. As a result of this change, there will be no conflict +between processes competing over the same port, thereby fixing the issue. Also +because of this change, a brick process on restart is not guaranteed to reuse +the same port it used to be connected to. + + +## Bugs addressed + +A total of 34 patches have been merged, addressing 31 bugs: + +- [#1336376](https://bugzilla.redhat.com/1336376): Sequential volume start&stop is failing with SSL enabled setup. +- [#1347717](https://bugzilla.redhat.com/1347717): removal of file from nfs mount crashs ganesha server +- [#1369766](https://bugzilla.redhat.com/1369766): glusterd: add brick command should re-use the port for listening which is freed by remove-brick. +- [#1371397](https://bugzilla.redhat.com/1371397): [Disperse] dd + rm + ls lead to IO hang +- [#1375125](https://bugzilla.redhat.com/1375125): arbiter volume write performance is bad. +- [#1377448](https://bugzilla.redhat.com/1377448): glusterd: Display proper error message and fail the command if S32gluster_enable_shared_storage.sh hook script is not present during gluster volume set all cluster.enable-shared-storage <enable/disable> command +- [#1384345](https://bugzilla.redhat.com/1384345): usage text is wrong for use-readdirp mount default +- [#1384356](https://bugzilla.redhat.com/1384356): Polling failure errors getting when volume is started&stopped with SSL enabled setup. +- [#1385442](https://bugzilla.redhat.com/1385442): invalid argument warning messages seen in fuse client logs 2016-09-30 06:34:58.938667] W [dict.c:418ict_set] (-->/usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x58722) 0-dict: !this || !value for key=link-count [Invalid argument] +- [#1385620](https://bugzilla.redhat.com/1385620): Recording (ffmpeg) processes on FUSE get hung +- [#1386071](https://bugzilla.redhat.com/1386071): Spurious permission denied problems observed +- [#1387976](https://bugzilla.redhat.com/1387976): Continuous warning messages getting when one of the cluster node is down on SSL setup. +- [#1388354](https://bugzilla.redhat.com/1388354): Memory Leaks in snapshot code path +- [#1388580](https://bugzilla.redhat.com/1388580): crypt: changes needed for openssl-1.1 (coming in Fedora 26) +- [#1388948](https://bugzilla.redhat.com/1388948): glusterfs can't self heal character dev file for invalid dev_t parameters +- [#1390838](https://bugzilla.redhat.com/1390838): write-behind: flush stuck by former failed write +- [#1390870](https://bugzilla.redhat.com/1390870): DHT: Rebalance- Misleading log messages from __dht_check_free_space function +- [#1391450](https://bugzilla.redhat.com/1391450): md-cache: Invalidate cache entry in case of OPEN with O_TRUNC +- [#1392288](https://bugzilla.redhat.com/1392288): gfapi clients crash while using async calls due to double fd_unref +- [#1392364](https://bugzilla.redhat.com/1392364): trashcan max file limit cannot go beyond 1GB +- [#1392716](https://bugzilla.redhat.com/1392716): Quota version not changing in the quota.conf after upgrading to 3.7.1 from 3.6.1 +- [#1392846](https://bugzilla.redhat.com/1392846): Hosted Engine VM paused post replace-brick operation +- [#1392868](https://bugzilla.redhat.com/1392868): The FUSE client log is filling up with posix_acl_default and posix_acl_access messages +- [#1393630](https://bugzilla.redhat.com/1393630): Better logging when reporting failures of the kind "<file-path> Failing MKNOD as quorum is not met" +- [#1393682](https://bugzilla.redhat.com/1393682): stat of file is hung with possible deadlock +- [#1394108](https://bugzilla.redhat.com/1394108): Continuous errors getting in the mount log when the volume mount server glusterd is down. +- [#1394187](https://bugzilla.redhat.com/1394187): SMB[md-cache Private Build]:Error messages in brick logs related to upcall_cache_invalidate gf_uuid_is_null +- [#1394226](https://bugzilla.redhat.com/1394226): "nfs-grace-monitor" timed out messages observed +- [#1394883](https://bugzilla.redhat.com/1394883): Failed to enable nfs-ganesha after disabling nfs-ganesha cluster +- [#1395627](https://bugzilla.redhat.com/1395627): Labelled geo-rep checkpoints hide geo-replication status +- [#1396418](https://bugzilla.redhat.com/1396418): [md-cache]: All bricks crashed while performing symlink and rename from client at the same time diff --git a/doc/release-notes/3.8.7.md b/doc/release-notes/3.8.7.md new file mode 100644 index 00000000000..5a2fc980297 --- /dev/null +++ b/doc/release-notes/3.8.7.md @@ -0,0 +1,76 @@ +# Release notes for Gluster 3.8.7 + +This is a bugfix release. The [Release Notes for 3.8.0](3.8.0.md), +[3.8.1](3.8.1.md), [3.8.2](3.8.2.md), [3.8.3](3.8.3.md), [3.8.4](3.8.4.md), +[3.8.5](3.8.5.md) and [3.8.6](3.8.6.md) contain a listing of all the new +features that were added and bugs fixed in the GlusterFS 3.8 stable release. + + +## New CLI option for granular entry heal enablement/disablement + +When there are already existing non-granular indices created that are yet to be +healed, if granular-entry-heal option is toggled from `off` to `on`, AFR +self-heal whenever it kicks in, will try to look for granular indices in +`entry-changes`. Because of the absence of name indices, granular entry healing +logic will fail to heal these directories, and worse yet unset pending extended +attributes with the assumption that are no entries that need heal. + +To get around this, a new CLI is introduced which will invoke glfsheal program +to figure whether at the time an attempt is made to enable granular entry heal, +there are pending heals on the volume OR there are one or more bricks that are +down. If either of them is true, the command will be failed with the +appropriate error. + + # gluster volume heal <VOL> granular-entry-heal {enable,disable} + +With this change, the user does not need to worry about when to enable/disable +the option - the CLI command itself performs the necessary checks before +allowing the "enable" command to proceed. + +What are those checks? +* Whether heal is already needed on the volume +* Whether any of the replicas is down + +In both of the cases, the command will be failed since AFR will be switching +from creating heal indices (markers for files that need heal) under +`.glusterfs/indices/xattrop` to creating them under +`.glusterfs/indices/entry-changes`. +The moment this switch happens, self-heal-daemon will cease to crawl the +entire directory if a directory needs heal and instead looks for exact names +under a directory that need heal under `.glusterfs/indices/entry-changes`. This +might cause self-heal to miss healing some entries (because before the +switch directories already needing heal won't have any indices under +`.glusterfs/indices/entry-changes`) and mistakenly unset the pending heal +xattrs even though the individual replicas are not in sync. + +When should users enable this option? +* When they want to use the feature ;) +* which is useful for faster self-healing in use cases with large number of + files under a single directory. + For example, it is useful in VM use cases with smaller shard sizes, given + that all shards are created under a single directory `.shard`. When a shard + is created while a replica was down, once it is back up, self-heal due to its + maintaining granular indices will know exactly which shard to recreate on the + sync as opposed to crawling the entire `.shard` directory to find out the + same information. + + +## Bugs addressed + +A total of 16 patches have been merged, addressing 15 bugs: + +- [#1395652](https://bugzilla.redhat.com/1395652): ganesha-ha.conf --status should validate if the VIPs are assigned to right nodes +- [#1397663](https://bugzilla.redhat.com/1397663): libgfapi core dumps +- [#1398501](https://bugzilla.redhat.com/1398501): [granular entry sh] - Provide a CLI to enable/disable the feature that checks that there are no heals pending before allowing the operation +- [#1399018](https://bugzilla.redhat.com/1399018): performance.read-ahead on results in processes on client stuck in IO wait +- [#1399088](https://bugzilla.redhat.com/1399088): geo-replica slave node goes faulty for non-root user session due to fail to locate gluster binary +- [#1399090](https://bugzilla.redhat.com/1399090): [geo-rep]: Worker crashes seen while renaming directories in loop +- [#1399130](https://bugzilla.redhat.com/1399130): SEEK_HOLE/ SEEK_DATA doesn't return the correct offset +- [#1399635](https://bugzilla.redhat.com/1399635): Refresh config fails while exporting subdirectories within a volume +- [#1400459](https://bugzilla.redhat.com/1400459): [USS,SSL] .snaps directory is not reachable when I/O encryption (SSL) is enabled +- [#1400573](https://bugzilla.redhat.com/1400573): Ganesha services are not stopped when pacemaker quorum is lost +- [#1400802](https://bugzilla.redhat.com/1400802): glusterfs_ctx_defaults_init is re-initializing ctx->locks +- [#1400927](https://bugzilla.redhat.com/1400927): Memory leak when self healing daemon queue is full +- [#1402672](https://bugzilla.redhat.com/1402672): Getting the warning message while erasing the gluster "glusterfs-server" package. +- [#1403192](https://bugzilla.redhat.com/1403192): Files remain unhealed forever if shd is disabled and re-enabled while healing is in progress. +- [#1403646](https://bugzilla.redhat.com/1403646): self-heal not happening, as self-heal info lists the same pending shards to be healed diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace index 84202df9aa2..cb6dcc4e867 100644 --- a/extras/ganesha/ocf/ganesha_grace +++ b/extras/ganesha/ocf/ganesha_grace @@ -164,7 +164,6 @@ ganesha_grace_notify() ganesha_grace_monitor() { - local rc=${OCF_ERR_GENERIC} local host=$(hostname -s) ocf_log debug "monitor" @@ -187,10 +186,10 @@ ganesha_grace_monitor() fi if [[ "${attr}" = *"value=1" ]]; then - rc=${OCF_SUCCESS} + return ${OCF_SUCCESS} fi - return ${rc} + return ${OCF_NOT_RUNNING} } ganesha_grace_validate() diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index 8b55abbc6c5..ac8c91f194e 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Red Hat Inc. All Rights Reserved +# Copyright 2015-2016 Red Hat Inc. All Rights Reserved # # Pacemaker+Corosync High Availability for NFS-Ganesha # @@ -74,13 +74,14 @@ GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} usage() { - echo "Usage : add|delete|status" - echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ + echo "Usage : add|delete|refresh-config|status" + echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ <NODE-HOSTNAME> <NODE-VIP>" - echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ + echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ <NODE-HOSTNAME>" - echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR>\ - <volume>" + echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \ +<volume>" + echo "Status : ganesha-ha.sh --status <HA_CONFDIR>" } determine_service_manager () { @@ -139,7 +140,7 @@ determine_servers() local tmp_ifs=${IFS} local ha_servers="" - if [[ "X${cmd}X" != "XsetupX" ]]; then + if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') IFS=$' ' for server in ${ha_servers} ; do @@ -166,12 +167,14 @@ setup_cluster() local num_servers=${2} local servers=${3} local unclean="" + local quorum_policy="stop" + logger "setting up cluster ${name} with the following ${servers}" pcs cluster auth ${servers} # pcs cluster setup --name ${name} ${servers} - pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} + pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} if [ $? -ne 0 ]; then logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" exit 1; @@ -198,10 +201,11 @@ setup_cluster() sleep 1 if [ ${num_servers} -lt 3 ]; then - pcs property set no-quorum-policy=ignore - if [ $? -ne 0 ]; then - logger "warning: pcs property set no-quorum-policy=ignore failed" - fi + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" fi pcs property set stonith-enabled=false @@ -261,7 +265,7 @@ ${tganesha_vol_conf} while [[ ${3} ]]; do current_host=`echo ${3} | cut -d "." -f 1` if [ ${short_host} != ${current_host} ]; then - removed_id=$(ssh -oPasswordAuthentication=no \ + removed_id=$(ssh -oPasswordAuthentication=no \ -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ "cat $HA_CONFDIR/exports/export.$VOL.conf |\ grep Export_Id | awk -F\"[=,;]\" '{print \$2}' | tr -d '[[:space:]]'") @@ -291,7 +295,7 @@ ${current_host}:${HA_CONFDIR}/exports/export.$VOL.conf "dbus-send --print-reply --system --dest=org.ganesha.nfsd \ /org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ string:$HA_CONFDIR/exports/export.$VOL.conf \ -string:\"EXPORT(Path=/$VOL)\" 2>&1") +string:\"EXPORT(Path=/$removed_id)\" 2>&1") ret=$? logger <<< "${output}" if [ ${ret} -ne 0 ]; then @@ -325,7 +329,7 @@ uint16:$removed_id 2>&1) output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ /org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ string:$HA_CONFDIR/exports/export.$VOL.conf \ -string:"EXPORT(Path=/$VOL)" 2>&1) +string:"EXPORT(Path=/$removed_id)" 2>&1) ret=$? logger <<< "${output}" if [ ${ret} -ne 0 ] ; then @@ -416,12 +420,13 @@ teardown_cluster() cleanup_ganesha_config () { - rm -rf ${HA_CONFDIR}/exports/*.conf - rm -rf ${HA_CONFDIR}/.export_added - rm -rf /etc/cluster/cluster.conf* - rm -rf /var/lib/pacemaker/cib/* - sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' ${GANESHA_CONF} - rm -rf ${HA_VOL_MNT}/nfs-ganesha + rm -rf ${HA_CONFDIR}/exports/*.conf + rm -rf ${HA_CONFDIR}/.export_added + rm -rf /etc/cluster/cluster.conf* + rm -rf /var/lib/pacemaker/cib/* + rm -f /etc/corosync/corosync.conf + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' ${GANESHA_CONF} + rm -rf ${HA_VOL_MNT}/nfs-ganesha } do_create_virt_ip_constraints() @@ -807,27 +812,148 @@ setup_state_volume() } +addnode_state_volume() +{ + local newnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${newnode} == *${dname} ]]; then + dirname=${newnode} + else + dirname=${newnode}${dname} + fi + + if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then + mkdir ${mnt}/nfs-ganesha/${dirname} + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + + for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done + +} + + +delnode_state_volume() +{ + local delnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${delnode} == *${dname} ]]; then + dirname=${delnode} + else + dirname=${delnode}${dname} + fi + + rm -rf ${mnt}/nfs-ganesha/${dirname} + + for server in ${HA_SERVERS} ; do + if [[ "${server}" != "${dirname}" ]]; then + rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done +} + + status() { - local regex_str="^ ${1}"; shift - local status_file=$(mktemp) + local scratch=$(mktemp) + local regex_str="^${1}-cluster_ip-1" + local healthy=0 + local index=1 + local nodes - while [[ ${1} ]]; do + # change tabs to spaces, strip leading spaces + pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch} - regex_str="${regex_str}|^ ${1}" + nodes[0]=${1}; shift + # make a regex of the configured nodes + # and initalize the nodes array for later + while [[ ${1} ]]; do + + regex_str="${regex_str}|^${1}-cluster_ip-1" + nodes[${index}]=${1} + ((index++)) shift done - pcs status | egrep "^Online:" > ${status_file} + # print the nodes that are expected to be online + grep -E "^Online:" ${scratch} + + echo - echo >> ${status_file} + # print the VIPs and which node they are on + grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 - pcs status | egrep "${regex_str}" | sed -e "s/\t/ /" | cut -d ' ' -f 2,4 >> ${status_file} + echo - cat ${status_file} + # check if the VIP RAs are on the expected nodes + for n in ${nodes[*]}; do + + grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done - rm -f ${status_file} + grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then + echo "Cluster HA Status: BAD" + elif [ ${healthy} -eq 0 ]; then + echo "Cluster HA Status: HEALTHY" + else + echo "Cluster HA Status: FAILOVER" + fi + + rm -f ${scratch} } @@ -839,20 +965,16 @@ main() usage exit 0 fi - if [[ ${cmd} != *status ]]; then - HA_CONFDIR=${1%/}; shift - local ha_conf=${HA_CONFDIR}/ganesha-ha.conf - local node="" - local vip="" + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" + local vip="" - # ignore any comment lines - cfgline=$(grep ^HA_NAME= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_NAME=) - cfgline=$(grep ^HA_VOL_SERVER= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_VOL_SERVER=) - cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) - fi + # ignore any comment lines + cfgline=$(grep ^HA_NAME= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_NAME=) + cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) case "${cmd}" in @@ -889,6 +1011,8 @@ main() teardown_resources ${HA_SERVERS} teardown_cluster ${HA_NAME} + + cleanup_ganesha_config ${HA_CONFDIR} ;; cleanup | --cleanup) @@ -927,7 +1051,8 @@ main() sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ $HA_CONFDIR/ganesha-ha.conf - HA_SERVERS="${HA_SERVERS} ${node}" + + addnode_state_volume ${node} setup_copy_config ${HA_SERVERS} ;; @@ -950,7 +1075,7 @@ $HA_CONFDIR/ganesha-ha.conf setup_copy_config ${HA_SERVERS} - rm -rf ${HA_VOL_MNT}/nfs-ganesha/${node} + delnode_state_volume ${node} determine_service_manager diff --git a/extras/group-virt.example b/extras/group-virt.example index b699c9e98e2..4fe3760be2c 100644 --- a/extras/group-virt.example +++ b/extras/group-virt.example @@ -6,3 +6,5 @@ cluster.eager-lock=enable network.remote-dio=enable cluster.quorum-type=auto cluster.server-quorum-type=server +features.shard=on +cluster.data-self-heal-algorithm=full diff --git a/extras/hook-scripts/reset/post/Makefile.am b/extras/hook-scripts/reset/post/Makefile.am index bb28d990596..1b336ac1a85 100644 --- a/extras/hook-scripts/reset/post/Makefile.am +++ b/extras/hook-scripts/reset/post/Makefile.am @@ -1,4 +1 @@ -EXTRA_DIST = S31ganesha-reset.sh - -hookdir = $(GLUSTERD_WORKDIR)/hooks/1/reset/post/ -hook_SCRIPTS = S31ganesha-reset.sh +EXTRA_DIST = diff --git a/extras/hook-scripts/reset/post/S31ganesha-reset.sh b/extras/hook-scripts/reset/post/S31ganesha-reset.sh deleted file mode 100755 index 9538911a842..00000000000 --- a/extras/hook-scripts/reset/post/S31ganesha-reset.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -PROGNAME="Sganesha-reset" -OPTSPEC="volname:,gd-workdir:" -VOL= -GLUSTERD_WORKDIR= - -function parse_args () { - ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@) - eval set -- "$ARGS" - while true; do - case $1 in - --volname) - shift - VOL=$1 - ;; - --gd-workdir) - shift - GLUSTERD_WORKDIR=$1 - ;; - *) - shift - break - ;; - esac - shift - done -} - -function is_volume_started () { - volname=$1 - echo "$(grep status $GLUSTERD_WORKDIR/vols/"$volname"/info |\ - cut -d"=" -f2)" -} - -parse_args $@ -if ps aux | grep -q "[g]anesha.nfsd" - then - kill -s TERM `cat /var/run/ganesha.pid` - sleep 10 - rm -rf /var/lib/glusterfs-ganesha/exports - rm -rf /var/lib/glusterfs-ganesha/.export_added - sed -i /conf/d /var/lib/ganesha/nfs-ganesha.conf - if [ "1" = $(is_volume_started "$VOL") ]; - then - gluster volume start $VOL force - fi -fi diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh index b85c789b9ed..d0b5101f0ea 100755 --- a/extras/hook-scripts/start/post/S31ganesha-start.sh +++ b/extras/hook-scripts/start/post/S31ganesha-start.sh @@ -61,6 +61,7 @@ echo "}" } #This function keeps track of export IDs and increments it with every new entry +#Also it adds the export dynamically by sending dbus signals function export_add() { count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l` @@ -79,17 +80,13 @@ function export_add() #fi fi echo $EXPORT_ID > $GANESHA_DIR/.export_added - sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \ + sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \ $GANESHA_DIR/exports/export.$VOL.conf echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1 -} -#This function adds a new export dynamically by sending dbus signals -function dynamic_export_add() -{ dbus-send --print-reply --system --dest=org.ganesha.nfsd \ /org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ -string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Path=/$VOL)" +string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)" } @@ -99,7 +96,6 @@ function start_ganesha() sed -i /$VOL.conf/d $CONF1 #Create a new export entry export_add $VOL - dynamic_export_add $VOL } diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index b459abcccee..6591716056c 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -539,7 +539,10 @@ def main_i(): if not 'config_file' in rconf: rconf['config_file'] = TMPL_CONFIG_FILE - upgrade_config_file(rconf['config_file'], confdata) + # Upgrade Config File only if it is session conf file + if rconf['config_file'] != TMPL_CONFIG_FILE: + upgrade_config_file(rconf['config_file'], confdata) + gcnf = GConffile( rconf['config_file'], canon_peers, confdata, defaults.__dict__, opts.__dict__, namedict) diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 796b9806f3a..b65abf98589 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -1069,13 +1069,15 @@ class GMasterChangelogMixin(GMasterCommon): self.sendmark(path, stime) # Update last_synced_time in status file based on stime - chkpt_time = gconf.configinterface.get_realtime( - "checkpoint") - checkpoint_time = 0 - if chkpt_time is not None: - checkpoint_time = int(chkpt_time) - - self.status.set_last_synced(stime, checkpoint_time) + # only update stime if stime xattr set to Brick root + if path == self.FLAT_DIR_HIERARCHY: + chkpt_time = gconf.configinterface.get_realtime( + "checkpoint") + checkpoint_time = 0 + if chkpt_time is not None: + checkpoint_time = int(chkpt_time) + + self.status.set_last_synced(stime, checkpoint_time) def update_worker_remote_node(self): node = sys.argv[-1] diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 6d26a56a7bf..7daf7e49211 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -784,11 +784,13 @@ class Server(object): if st.st_ino == st1.st_ino: # we have a hard link, we can now unlink source try: - os.unlink(entry) + errno_wrap(os.unlink, [entry], + [ENOENT, ESTALE]) except OSError as e: if e.errno == EISDIR: try: - os.rmdir(entry) + errno_wrap(os.rmdir, [entry], + [ENOENT, ESTALE]) except OSError as e: if e.errno == ENOTEMPTY: logging.error( @@ -1001,7 +1003,8 @@ class SlaveRemote(object): (boolify(gconf.sync_acls) and ['--acls'] or []) + \ ['.'] + list(args) - if gconf.log_rsync_performance: + if boolify(gconf.configinterface.get_realtime( + "log_rsync_performance")): # use stdout=PIPE only when log_rsync_performance enabled # Else rsync will write to stdout and nobody is their # to consume. If PIPE is full rsync hangs. @@ -1020,7 +1023,8 @@ class SlaveRemote(object): for errline in stderr.strip().split("\n")[:-1]: logging.error("SYNC Error(Rsync): %s" % errline) - if gconf.log_rsync_performance: + if boolify(gconf.configinterface.get_realtime( + "log_rsync_performance")): rsync_msg = [] for line in stdout.split("\n"): if line.startswith("Number of files:") or \ diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 94dbb96c81c..29bf00c60a9 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -1014,7 +1014,6 @@ exit 0 %{_libexecdir}/ganesha/* %{_prefix}/lib/ocf/resource.d/heartbeat/* %{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh -%{_sharedstatedir}/glusterd/hooks/1/reset/post/S31ganesha-reset.sh %if ( 0%{!?_without_georeplication:1} ) %files geo-replication @@ -1196,6 +1195,9 @@ exit 0 %endif %changelog +* Thu Dec 19 2016 Jiffin Tony Thottan <jhottan@redhat.com> +- remove S31ganesha-reset.sh from hooks (#1405951) + * Mon Aug 22 2016 Milind Changire <mchangir@redhat.com> - Add psmisc as dependency for glusterfs-fuse for killall command (#1367665) diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index ea6d3ff7225..556b82742cb 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -1321,10 +1321,14 @@ glusterfs_handle_barrier (rpcsvc_request_t *req) req->rpc_err = GARBAGE_ARGS; goto out; } + ret = -1; ctx = glusterfsd_ctx; - GF_ASSERT (ctx); + GF_VALIDATE_OR_GOTO (THIS->name, ctx, out); + active = ctx->active; + GF_VALIDATE_OR_GOTO (THIS->name, active, out); + any = active->first; dict = dict_new(); @@ -1889,6 +1893,8 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, rpc_transport_t *rpc_trans = NULL; int need_term = 0; int emval = 0; + static int log_ctr1; + static int log_ctr2; struct dnscache6 *dnscache = NULL; this = mydata; @@ -1900,11 +1906,11 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, ctx->cmd_args.connect_attempts++; gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, - "Connect attempt with remote-host: %s (%u/%d)", + "Connect attempt with remote-host: %s (%s) (%u/%d)", ctx->cmd_args.volfile_server, + strerror (errno), ctx->cmd_args.connect_attempts, ctx->cmd_args.max_connect_attempts); - if (!rpc->disabled) { /* * Check if dnscache is exhausted for current server @@ -1926,8 +1932,9 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, if (!ctx->active) need_term = 1; emval = ENOTCONN; - gf_log("glusterfsd-mgmt", GF_LOG_INFO, - "Exhausted all volfile servers"); + GF_LOG_OCCASIONALLY (log_ctr2, "glusterfsd-mgmt", + GF_LOG_INFO, + "Exhausted all volfile servers"); break; } diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index 0a880cb752e..c65682a5517 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -39,7 +39,7 @@ xmlDocPtr glfsh_doc = NULL; ret = 0; \ } while (0) \ -typedef void (*print_status) (dict_t *, char *, uuid_t, uint64_t *, +typedef int (*print_status) (dict_t *, char *, uuid_t, uint64_t *, gf_boolean_t flag); int glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol, @@ -65,6 +65,11 @@ int32_t is_xml; "source-brick <HOSTNAME:BRICKNAME> [<FILE>] | "\ "split-brain-info]\n" +typedef enum { + GLFSH_MODE_CONTINUE_ON_ERROR = 1, + GLFSH_MODE_EXIT_ON_FIRST_FAILURE, +} glfsh_fail_mode_t; + int glfsh_init () { @@ -72,6 +77,30 @@ glfsh_init () } int +glfsh_end_op_granular_entry_heal (int op_ret, char *op_errstr) +{ + /* If error sting is available, give it higher precedence.*/ + + if (op_errstr) { + printf ("%s\n", op_errstr); + } else if (op_ret < 0) { + if (op_ret == -EAGAIN) + printf ("One or more entries need heal. Please execute " + "the command again after there are no entries " + "to be healed\n"); + else if (op_ret == -ENOTCONN) + printf ("One or more bricks could be down. Please " + "execute the command again after bringing all " + "bricks online and finishing any pending " + "heals\n"); + else + printf ("Command failed - %s. Please check the logs for" + " more details\n", strerror (-op_ret)); + } + return 0; +} + +int glfsh_end (int op_ret, char *op_errstr) { if (op_errstr) @@ -87,6 +116,12 @@ glfsh_print_hr_spb_status (char *path, uuid_t gfid, char *status) } void +glfsh_no_print_hr_heal_status (char *path, uuid_t gfid, char *status) +{ + return; +} + +void glfsh_print_hr_heal_status (char *path, uuid_t gfid, char *status) { printf ("%s%s\n", path, status); @@ -291,6 +326,12 @@ out: } int +glfsh_no_print_hr_heal_op_status (int ret, uint64_t num_entries, char *fmt_str) +{ + return 0; +} + +int glfsh_print_hr_heal_op_status (int ret, uint64_t num_entries, char *fmt_str) { if (ret < 0 && num_entries == 0) { @@ -417,7 +458,7 @@ glfsh_index_purge (xlator_t *subvol, inode_t *inode, char *name) return ret; } -void +int glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid, uint64_t *num_entries, gf_boolean_t flag) { @@ -429,7 +470,7 @@ glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid, ret = dict_get_str (dict, "heal-info", &value); if (ret) - return; + return 0; if (!strcmp (value, "split-brain")) { split_b = _gf_true; @@ -451,10 +492,10 @@ glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid, gfid, NULL); } } - return; + return 0; } -void +int glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid, uint64_t *num_entries, gf_boolean_t ignore_dirty) { @@ -466,7 +507,7 @@ glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid, ret = dict_get_str (dict, "heal-info", &value); if (ret || (!strcmp (value, "no-heal"))) - return; + return 0; if (!strcmp (value, "heal")) { ret = gf_asprintf (&status, " "); @@ -509,7 +550,7 @@ out: if (pending) { GF_FREE (status); status = NULL; - return; + return 0; } } if (ret == -1) @@ -522,7 +563,21 @@ out: status ? status : ""); GF_FREE (status); - return; + return 0; +} + +int +glfsh_heal_status_boolean (dict_t *dict, char *path, uuid_t gfid, + uint64_t *num_entries, gf_boolean_t ignore_dirty) +{ + int ret = 0; + char *value = NULL; + + ret = dict_get_str (dict, "heal-info", &value); + if ((!ret) && (!strcmp (value, "no-heal"))) + return 0; + else + return -1; } static int @@ -556,11 +611,12 @@ static int glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, uint64_t *offset, uint64_t *num_entries, print_status glfsh_print_status, - gf_boolean_t ignore_dirty) + gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode) { gf_dirent_t *entry = NULL; gf_dirent_t *tmp = NULL; int ret = 0; + int print_status = 0; char *path = NULL; uuid_t gfid = {0}; xlator_t *this = NULL; @@ -586,8 +642,13 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, gf_uuid_copy (loc.gfid, gfid); ret = syncop_getxattr (this, &loc, &dict, GF_HEAL_INFO, NULL, NULL); - if (ret) - continue; + if (ret) { + if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && + (ret == -ENOTCONN)) + goto out; + else + continue; + } ret = syncop_gfid_to_path (this->itable, xl, gfid, &path); @@ -596,11 +657,19 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, ret = 0; continue; } - if (dict) - glfsh_print_status (dict, path, gfid, - num_entries, ignore_dirty); + if (dict) { + print_status = glfsh_print_status (dict, path, gfid, + num_entries, + ignore_dirty); + if ((print_status) && + (mode != GLFSH_MODE_CONTINUE_ON_ERROR)) { + ret = -EAGAIN; + goto out; + } + } } ret = 0; +out: GF_FREE (path); if (dict) { dict_unref (dict); @@ -615,17 +684,21 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, dict_t *xattr_req, uint64_t *num_entries, gf_boolean_t ignore) { - uint64_t offset = 0; + int ret = 0; + int heal_op = -1; + uint64_t offset = 0; gf_dirent_t entries; - int ret = 0; gf_boolean_t free_entries = _gf_false; - int heal_op = -1; + glfsh_fail_mode_t mode = GLFSH_MODE_CONTINUE_ON_ERROR; INIT_LIST_HEAD (&entries.list); ret = dict_get_int32 (xattr_req, "heal-op", &heal_op); if (ret) return ret; + if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + mode = GLFSH_MODE_EXIT_ON_FIRST_FAILURE; + while (1) { ret = syncop_readdir (readdir_xl, fd, 131072, offset, &entries, NULL, NULL); @@ -642,7 +715,7 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, &entries, &offset, num_entries, glfsh_print_heal_status, - ignore); + ignore, mode); if (ret < 0) goto out; } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) { @@ -650,13 +723,20 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, &entries, &offset, num_entries, glfsh_print_spb_status, - ignore); + ignore, mode); if (ret < 0) goto out; } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) { ret = glfsh_heal_entries (fs, top_subvol, rootloc, &entries, &offset, num_entries, xattr_req); + } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) { + ret = glfsh_process_entries (readdir_xl, fd, &entries, + &offset, num_entries, + glfsh_heal_status_boolean, + ignore, mode); + if (ret < 0) + goto out; } gf_dirent_free (&entries); free_entries = _gf_false; @@ -669,6 +749,12 @@ out: } static int +glfsh_no_print_brick_from_xl (xlator_t *xl, loc_t *rootloc) +{ + return 0; +} + +static int glfsh_print_brick_from_xl (xlator_t *xl, loc_t *rootloc) { char *remote_host = NULL; @@ -746,6 +832,13 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (ret) goto out; + if ((!is_parent_replicate) && + ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE))) { + ret = 0; + goto out; + } + ret = glfsh_output->print_brick_from_xl (xl, rootloc); if (ret < 0) goto out; @@ -753,6 +846,10 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, ret = glfsh_print_pending_heals_type (fs, top_subvol, rootloc, xl, heal_op, xattr_req, GF_XATTROP_INDEX_GFID, &count); + + if (ret < 0 && heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + goto out; + total += count; count = 0; if (ret == -ENOTCONN) @@ -775,6 +872,31 @@ out: } static int +glfsh_set_heal_options (glfs_t *fs, gf_xl_afr_op_t heal_op) +{ + int ret = 0; + + if ((heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) && + (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) && + (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME)) + return 0; + ret = glfs_set_xlator_option (fs, "*-replicate-*", "data-self-heal", + "on"); + if (ret) + goto out; + + ret = glfs_set_xlator_option (fs, "*-replicate-*", "metadata-self-heal", + "on"); + if (ret) + goto out; + + ret = glfs_set_xlator_option (fs, "*-replicate-*", "entry-self-heal", + "on"); +out: + return ret; +} + +static int glfsh_validate_volume (xlator_t *xl, gf_xl_afr_op_t heal_op) { xlator_t *heal_xl = NULL; @@ -833,14 +955,14 @@ out: return NULL; } - int glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, gf_xl_afr_op_t heal_op) { - xlator_t *xl = NULL; + int ret = 0; + xlator_t *xl = NULL; xlator_t *heal_xl = NULL; - xlator_t *old_THIS = NULL; + xlator_t *old_THIS = NULL; xl = top_subvol; while (xl->next) @@ -851,20 +973,28 @@ glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (heal_xl) { old_THIS = THIS; THIS = heal_xl; - glfsh_print_pending_heals (fs, top_subvol, - rootloc, xl, - heal_op, - !strcmp - (heal_xl->type, - "cluster/replicate")); + ret = glfsh_print_pending_heals (fs, top_subvol, + rootloc, xl, + heal_op, + !strcmp + (heal_xl->type, + "cluster/replicate")); THIS = old_THIS; + + if ((ret < 0) && + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)) + goto out; } } xl = xl->prev; } - return 0; +out: + if (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + ret = 0; + + return ret; } int @@ -1102,6 +1232,15 @@ glfsh_info_t glfsh_human_readable = { .end = glfsh_end }; +glfsh_info_t glfsh_no_print = { + .init = glfsh_init, + .print_brick_from_xl = glfsh_no_print_brick_from_xl, + .print_heal_op_status = glfsh_no_print_hr_heal_op_status, + .print_heal_status = glfsh_no_print_hr_heal_status, + .print_spb_status = glfsh_no_print_hr_heal_status, + .end = glfsh_end_op_granular_entry_heal +}; + #if (HAVE_LIB_XML) glfsh_info_t glfsh_xml_output = { .init = glfsh_xml_init, @@ -1145,6 +1284,8 @@ main (int argc, char **argv) } else if (!strcmp (argv[2], "xml")) { heal_op = GF_SHD_OP_INDEX_SUMMARY; is_xml = 1; + } else if (!strcmp (argv[2], "granular-entry-heal-op")) { + heal_op = GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE; } else { printf (USAGE_STR, argv[0]); ret = -1; @@ -1201,6 +1342,9 @@ main (int argc, char **argv) } + if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + glfsh_output = &glfsh_no_print; + ret = glfsh_output->init (); if (ret) exit (EXIT_FAILURE); @@ -1224,6 +1368,13 @@ main (int argc, char **argv) "%s", strerror (errno)); goto out; } + + ret = glfsh_set_heal_options (fs, heal_op); + if (ret) { + printf ("Setting xlator heal options failed, %s\n", + strerror(errno)); + goto out; + } snprintf (logfilepath, sizeof (logfilepath), DEFAULT_HEAL_LOG_FILE_DIRECTORY"/glfsheal-%s.log", volname); ret = glfs_set_logging(fs, logfilepath, GF_LOG_INFO); @@ -1277,6 +1428,7 @@ main (int argc, char **argv) switch (heal_op) { case GF_SHD_OP_INDEX_SUMMARY: case GF_SHD_OP_SPLIT_BRAIN_FILES: + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc, heal_op); break; @@ -1295,6 +1447,8 @@ main (int argc, char **argv) } glfsh_output->end (ret, NULL); + if (ret < 0) + ret = -ret; loc_wipe (&rootloc); glfs_subvol_done (fs, top_subvol); cleanup (fs); diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h index 54e7589b164..56736e52052 100644 --- a/libglusterfs/src/compat.h +++ b/libglusterfs/src/compat.h @@ -158,6 +158,10 @@ enum { #define s6_addr32 __u6_addr.__u6_addr32 #endif +#ifndef LOGIN_NAME_MAX +#define LOGIN_NAME_MAX 256 +#endif + /* Posix dictates NAME_MAX to be used */ # ifndef NAME_MAX # ifdef MAXNAMLEN diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c index 2aa14654b9e..b009e6270a2 100644 --- a/libglusterfs/src/ctx.c +++ b/libglusterfs/src/ctx.c @@ -35,6 +35,8 @@ glusterfs_ctx_new () ctx->daemon_pipe[0] = -1; ctx->daemon_pipe[1] = -1; + ctx->log.loglevel = DEFAULT_LOG_LEVEL; + /* lock is never destroyed! */ ret = LOCK_INIT (&ctx->lock); if (ret) { diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c index 40e80d5c33e..4d81ade8b60 100644 --- a/libglusterfs/src/mem-pool.c +++ b/libglusterfs/src/mem-pool.c @@ -422,7 +422,11 @@ mem_pool_new_fn (unsigned long sizeof_type, if (!ctx) goto out; - list_add (&mem_pool->global_list, &ctx->mempool_list); + LOCK (&ctx->lock); + { + list_add (&mem_pool->global_list, &ctx->mempool_list); + } + UNLOCK (&ctx->lock); out: return mem_pool; diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h index 2899be9bf2f..393fdac8e73 100644 --- a/libglusterfs/src/stack.h +++ b/libglusterfs/src/stack.h @@ -275,17 +275,19 @@ STACK_RESET (call_stack_t *stack) #define STACK_WIND_TAIL(frame, obj, fn, params ...) \ do { \ xlator_t *old_THIS = NULL; \ + xlator_t *next_xl = obj; \ + typeof(fn) next_xl_fn = fn; \ \ - frame->this = obj; \ + frame->this = next_xl; \ frame->wind_to = #fn; \ old_THIS = THIS; \ - THIS = obj; \ + THIS = next_xl; \ gf_msg_trace ("stack-trace", 0, \ "stack-address: %p, " \ "winding from %s to %s", \ frame->root, old_THIS->name, \ THIS->name); \ - fn (frame, obj, params); \ + next_xl_fn (frame, next_xl, params); \ THIS = old_THIS; \ } while (0) diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c index 8f25db237f5..de6033fc501 100644 --- a/libglusterfs/src/syncop-utils.c +++ b/libglusterfs/src/syncop-utils.c @@ -266,9 +266,10 @@ _dir_scan_job_fn (void *data) entry = NULL; pthread_mutex_lock (scan_data->mut); { - if (ret || list_empty (&scan_data->q->list)) { - (*scan_data->jobs_running)--; + if (ret) *scan_data->retval |= ret; + if (list_empty (&scan_data->q->list)) { + (*scan_data->jobs_running)--; pthread_cond_broadcast (scan_data->cond); } else { entry = list_first_entry (&scan_data->q->list, @@ -406,10 +407,13 @@ syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, ret = fn (subvol, entry, loc, data); gf_dirent_entry_free (entry); if (ret) - break; + goto out; continue; } + if (retval) /*Any jobs failed?*/ + goto out; + pthread_mutex_lock (&mut); { while (qlen == max_qlen) @@ -423,8 +427,7 @@ syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, } } pthread_mutex_unlock (&mut); - if (retval) /*Any jobs failed?*/ - break; + if (!entry) continue; @@ -433,7 +436,7 @@ syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, &retval, &mut, &cond, &jobs_running, &qlen, fn, data); if (ret) - break; + goto out; } } diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 915d358e707..8a178148a10 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -252,6 +252,8 @@ typedef enum { GF_SHD_OP_HEAL_ENABLE, GF_SHD_OP_HEAL_DISABLE, GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME, + GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE, + GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE, } gf_xl_afr_op_t ; struct gf_gsync_detailed_status_ { diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index a9e43eb42f1..fe099f92f60 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -872,6 +872,41 @@ rpc_clnt_destroy (struct rpc_clnt *rpc); #define RPC_THIS_RESTORE (THIS = old_THIS) +static int +rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn) +{ + struct timespec ts = {0, }; + gf_boolean_t unref_clnt = _gf_false; + + rpc_clnt_connection_cleanup (conn); + + pthread_mutex_lock (&conn->lock); + { + if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) { + ts.tv_sec = 10; + ts.tv_nsec = 0; + + rpc_clnt_ref (clnt); + conn->reconnect = gf_timer_call_after (clnt->ctx, ts, + rpc_clnt_reconnect, conn); + if (conn->reconnect == NULL) { + gf_log (conn->name, GF_LOG_WARNING, + "Cannot create rpc_clnt_reconnect timer"); + unref_clnt = _gf_true; + } + } + } + pthread_mutex_unlock (&conn->lock); + + if (clnt->notifyfn) + clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL); + + if (unref_clnt) + rpc_clnt_ref (clnt); + + return 0; +} + int rpc_clnt_notify (rpc_transport_t *trans, void *mydata, rpc_transport_event_t event, void *data, ...) @@ -881,9 +916,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, int ret = -1; rpc_request_info_t *req_info = NULL; rpc_transport_pollin_t *pollin = NULL; - struct timespec ts = {0, }; void *clnt_mydata = NULL; - gf_boolean_t unref_clnt = _gf_false; DECLARE_OLD_THIS; conn = mydata; @@ -899,35 +932,11 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, switch (event) { case RPC_TRANSPORT_DISCONNECT: { - rpc_clnt_connection_cleanup (conn); - - pthread_mutex_lock (&conn->lock); + pthread_mutex_lock (&clnt->notifylock); { - if (!conn->rpc_clnt->disabled - && (conn->reconnect == NULL)) { - ts.tv_sec = 10; - ts.tv_nsec = 0; - - rpc_clnt_ref (clnt); - conn->reconnect = - gf_timer_call_after (clnt->ctx, ts, - rpc_clnt_reconnect, - conn); - if (conn->reconnect == NULL) { - gf_log (conn->name, GF_LOG_WARNING, - "Cannot create rpc_clnt_reconnect timer"); - unref_clnt = _gf_true; - } - } + rpc_clnt_handle_disconnect (clnt, conn); } - pthread_mutex_unlock (&conn->lock); - - if (clnt->notifyfn) - ret = clnt->notifyfn (clnt, clnt->mydata, - RPC_CLNT_DISCONNECT, NULL); - if (unref_clnt) - rpc_clnt_ref (clnt); - + pthread_mutex_unlock (&clnt->notifylock); break; } @@ -982,17 +991,21 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, case RPC_TRANSPORT_CONNECT: { - /* Every time there is a disconnection, processes - should try to connect to 'glusterd' (ie, default - port) or whichever port given as 'option remote-port' - in volume file. */ - /* Below code makes sure the (re-)configured port lasts - for just one successful attempt */ - conn->config.remote_port = 0; - - if (clnt->notifyfn) - ret = clnt->notifyfn (clnt, clnt->mydata, - RPC_CLNT_CONNECT, NULL); + pthread_mutex_lock (&clnt->notifylock); + { + /* Every time there is a disconnection, processes + * should try to connect to 'glusterd' (ie, default + * port) or whichever port given as 'option remote-port' + * in volume file. */ + /* Below code makes sure the (re-)configured port lasts + * for just one successful attempt */ + conn->config.remote_port = 0; + + if (clnt->notifyfn) + ret = clnt->notifyfn (clnt, clnt->mydata, + RPC_CLNT_CONNECT, NULL); + } + pthread_mutex_unlock (&clnt->notifylock); break; } @@ -1116,6 +1129,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, } pthread_mutex_init (&rpc->lock, NULL); + pthread_mutex_init (&rpc->notifylock, NULL); rpc->ctx = ctx; rpc->owner = owner; @@ -1125,6 +1139,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size); if (rpc->reqpool == NULL) { pthread_mutex_destroy (&rpc->lock); + pthread_mutex_destroy (&rpc->notifylock); GF_FREE (rpc); rpc = NULL; goto out; @@ -1134,6 +1149,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, reqpool_size); if (rpc->saved_frames_pool == NULL) { pthread_mutex_destroy (&rpc->lock); + pthread_mutex_destroy (&rpc->notifylock); mem_pool_destroy (rpc->reqpool); GF_FREE (rpc); rpc = NULL; @@ -1143,6 +1159,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, ret = rpc_clnt_connection_init (rpc, ctx, options, name); if (ret == -1) { pthread_mutex_destroy (&rpc->lock); + pthread_mutex_destroy (&rpc->notifylock); mem_pool_destroy (rpc->reqpool); mem_pool_destroy (rpc->saved_frames_pool); GF_FREE (rpc); @@ -1738,6 +1755,7 @@ rpc_clnt_destroy (struct rpc_clnt *rpc) saved_frames_destroy (rpc->conn.saved_frames); pthread_mutex_destroy (&rpc->lock); pthread_mutex_destroy (&rpc->conn.lock); + pthread_mutex_destroy (&rpc->notifylock); /* mem-pool should be destroyed, otherwise, it will cause huge memory leaks */ diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index df84d5966d8..2ccaa56e4cb 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -173,6 +173,7 @@ struct rpc_req { typedef struct rpc_clnt { pthread_mutex_t lock; + pthread_mutex_t notifylock; rpc_clnt_notify_t notifyfn; rpc_clnt_connection_t conn; void *mydata; diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index 76548192c9a..8c1690f820c 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2544,7 +2544,7 @@ socket_poller (void *ctx) "poll error on socket"); break; } - if (ret < 0 && errno != ENODATA) { + if (ret < 0) { GF_LOG_OCCASIONALLY (poll_err_cnt, this->name, GF_LOG_ERROR, "socket_poller %s failed (%s)", diff --git a/tests/basic/afr/granular-esh/add-brick.t b/tests/basic/afr/granular-esh/add-brick.t index f3125d7fe7d..270cf1d32a6 100644 --- a/tests/basic/afr/granular-esh/add-brick.t +++ b/tests/basic/afr/granular-esh/add-brick.t @@ -14,7 +14,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 self-heal-daemon off -TEST $CLI volume set $V0 granular-entry-heal on +TEST $CLI volume heal $V0 granular-entry-heal enable TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t new file mode 100644 index 00000000000..a655180a095 --- /dev/null +++ b/tests/basic/afr/granular-esh/cli.t @@ -0,0 +1,142 @@ +#!/bin/bash + +. $(dirname $0)/../../../include.rc +. $(dirname $0)/../../../volume.rc +. $(dirname $0)/../../../afr.rc + +cleanup + +TESTS_EXPECTED_IN_LOOP=4 + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +# Test that enabling the option should work on a newly created volume +TEST $CLI volume set $V0 cluster.granular-entry-heal on +TEST $CLI volume set $V0 cluster.granular-entry-heal off + +######################### +##### DISPERSE TEST ##### +######################### +# Execute the same command on a disperse volume and make sure it fails. +TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2} +TEST $CLI volume start $V1 +TEST ! $CLI volume heal $V1 granular-entry-heal enable +TEST ! $CLI volume heal $V1 granular-entry-heal disable + +####################### +###### TIER TEST ###### +####################### +# Execute the same command on a disperse + replicate tiered volume and make +# sure the option is set on the replicate leg of the volume +TEST $CLI volume attach-tier $V1 replica 2 $H0:$B0/${V1}{3,4} +TEST $CLI volume heal $V1 granular-entry-heal enable +EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal +TEST $CLI volume heal $V1 granular-entry-heal disable +EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal + +# Kill a disperse brick and make heal be pending on the volume. +TEST kill_brick $V1 $H0 $B0/${V1}0 + +# Now make sure that one offline brick in disperse does not affect enabling the +# option on the volume. +TEST $CLI volume heal $V1 granular-entry-heal enable +EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal +TEST $CLI volume heal $V1 granular-entry-heal disable +EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal + +# Now kill a replicate brick. +TEST kill_brick $V1 $H0 $B0/${V1}3 +# Now make sure that one offline brick in replicate causes the command to be +# failed. +TEST ! $CLI volume heal $V1 granular-entry-heal enable +EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal + +###################### +### REPLICATE TEST ### +###################### +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 self-heal-daemon off +# Test that the volume-set way of enabling the option is disallowed +TEST ! $CLI volume set $V0 granular-entry-heal on +# Test that the volume-heal way of enabling the option is allowed +TEST $CLI volume heal $V0 granular-entry-heal enable +# Volume-reset of the option should be allowed +TEST $CLI volume reset $V0 granular-entry-heal +TEST $CLI volume heal $V0 granular-entry-heal enable + +EXPECT "enable" volume_option $V0 cluster.granular-entry-heal + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 + +# Kill brick-0. +TEST kill_brick $V0 $H0 $B0/${V0}0 + +# Disabling the option should work even when one or more bricks are down +TEST $CLI volume heal $V0 granular-entry-heal disable +# When a brick is down, 'enable' attempt should be failed +TEST ! $CLI volume heal $V0 granular-entry-heal enable + +# Restart the killed brick +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +# When all bricks are up, it should be possible to enable the option +TEST $CLI volume heal $V0 granular-entry-heal enable + +# Kill brick-0 again +TEST kill_brick $V0 $H0 $B0/${V0}0 + +# Create files under root +for i in {1..2} +do + echo $i > $M0/f$i +done + +# Test that the index associated with '/' is created on B1. +TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID + +# Check for successful creation of granular entry indices +for i in {1..2} +do + TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i +done + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST gluster volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 + +# Wait for heal to complete +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +# Test if data was healed +for i in {1..2} +do + TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i +done + +# Now verify that there are no name indices left after self-heal +TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1 +TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2 +TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID + +# Perform a volume-reset-all-options operation +TEST $CLI volume reset $V0 +# Ensure that granular entry heal is also disabled +EXPECT "no" volume_get_field $V0 cluster.granular-entry-heal +EXPECT "on" volume_get_field $V0 cluster.entry-self-heal + +cleanup +#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1399038 diff --git a/tests/basic/afr/granular-esh/conservative-merge.t b/tests/basic/afr/granular-esh/conservative-merge.t index b566a0ea4d3..b170e47e0cb 100644 --- a/tests/basic/afr/granular-esh/conservative-merge.t +++ b/tests/basic/afr/granular-esh/conservative-merge.t @@ -11,13 +11,13 @@ TESTS_EXPECTED_IN_LOOP=4 TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 TEST $CLI volume set $V0 self-heal-daemon off TEST $CLI volume set $V0 data-self-heal off TEST $CLI volume set $V0 metadata-self-heal off TEST $CLI volume set $V0 entry-self-heal off -TEST $CLI volume set $V0 granular-entry-heal on +TEST $CLI volume heal $V0 granular-entry-heal enable -TEST $CLI volume start $V0 TEST $GFS --volfile-id=$V0 -s $H0 $M0 TEST mkdir $M0/dir diff --git a/tests/basic/afr/granular-esh/granular-esh.t b/tests/basic/afr/granular-esh/granular-esh.t index ee53878e004..de0e8f4290b 100644 --- a/tests/basic/afr/granular-esh/granular-esh.t +++ b/tests/basic/afr/granular-esh/granular-esh.t @@ -16,7 +16,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 self-heal-daemon off -TEST $CLI volume set $V0 granular-entry-heal on +TEST $CLI volume heal $V0 granular-entry-heal enable TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 diff --git a/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t b/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t new file mode 100644 index 00000000000..1b5421bf4b6 --- /dev/null +++ b/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t @@ -0,0 +1,76 @@ +#!/bin/bash + +. $(dirname $0)/../../../include.rc +. $(dirname $0)/../../../volume.rc +. $(dirname $0)/../../../afr.rc + +cleanup + +TESTS_EXPECTED_IN_LOOP=4 + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume heal $V0 granular-entry-heal enable + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 + +# Kill brick-0. +TEST kill_brick $V0 $H0 $B0/${V0}0 + +# Create files under root +for i in {1..2} +do + echo $i > $M0/f$i +done + +# Test that the index associated with '/' is created on B1. +TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID + +# Check for successful creation of granular entry indices +for i in {1..2} +do + TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i +done + +# Now disable granular-entry-heal +TEST $CLI volume heal $V0 granular-entry-heal disable + +# Start the brick that was down +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +# Enable shd +TEST gluster volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +# Now the indices created are granular but the heal is going to be of the +# normal kind. We test to make sure that heal still completes fine and that +# the stale granular indices are going to be deleted + +TEST $CLI volume heal $V0 + +# Wait for heal to complete +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +# Test if data was healed +for i in {1..2} +do + TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i +done + +# Now verify that there are no name indices left after self-heal +TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1 +TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2 +TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID + +cleanup diff --git a/tests/basic/afr/granular-esh/replace-brick.t b/tests/basic/afr/granular-esh/replace-brick.t index aaa54da2a2c..639ed81b95c 100644 --- a/tests/basic/afr/granular-esh/replace-brick.t +++ b/tests/basic/afr/granular-esh/replace-brick.t @@ -12,7 +12,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 self-heal-daemon off -TEST $CLI volume set $V0 granular-entry-heal on +TEST $CLI volume heal $V0 granular-entry-heal enable TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/basic/afr/split-brain-favorite-child-policy.t index 7a14852685c..3df8e718bf0 100644 --- a/tests/basic/afr/split-brain-favorite-child-policy.t +++ b/tests/basic/afr/split-brain-favorite-child-policy.t @@ -42,8 +42,15 @@ TEST $CLI volume heal $V0 cat $M0/file > /dev/null EXPECT "1" echo $? -#We know that the first brick has latest ctime. -LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) +# Umount to prevent further FOPS on the file, then find the brick with latest ctime. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +ctime1=`stat -c "%.Z" $B0/${V0}0/file` +ctime2=`stat -c "%.Z" $B0/${V0}1/file` +if (( $(echo "$ctime1 > $ctime2" | bc -l) )); then + LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) +else + LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) +fi TEST $CLI volume set $V0 cluster.favorite-child-policy ctime TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -51,10 +58,13 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) +B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) +TEST [ "$LATEST_CTIME_MD5" == "$B0_MD5" ] +TEST [ "$LATEST_CTIME_MD5" == "$B1_MD5" ] +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 cat $M0/file > /dev/null EXPECT "0" echo $? -HEALED_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ] ############ Healing using favorite-child-policy = mtime ################# TEST $CLI volume set $V0 cluster.favorite-child-policy none diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t index 302a3e6144b..c66bb5d44df 100644 --- a/tests/basic/afr/split-brain-healing.t +++ b/tests/basic/afr/split-brain-healing.t @@ -31,6 +31,9 @@ TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume start $V0 TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t new file mode 100755 index 00000000000..6351ba22511 --- /dev/null +++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t @@ -0,0 +1,41 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.shd-wait-qlength 100 +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +touch $M0/file{1..200} + +TEST kill_brick $V0 $H0 $B0/${V0}1 +for i in {1..200}; do echo hello>$M0/file$i; done +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +EXPECT "200" get_pending_heal_count $V0 +TEST $CLI volume set $V0 self-heal-daemon on + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 +TEST $CLI volume set $V0 self-heal-daemon off +EXPECT_NOT "^0$" get_pending_heal_count $V0 +TEST $CLI volume set $V0 self-heal-daemon on + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +TEST umount $M0 +cleanup; diff --git a/tests/bugs/glusterd/1313628-import-brick-ports-always.t b/tests/bugs/glusterd/1313628-import-brick-ports-always.t deleted file mode 100755 index d04c4293466..00000000000 --- a/tests/bugs/glusterd/1313628-import-brick-ports-always.t +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../cluster.rc - -## Check that brick ports are always copied on import -## -------------------------------------------------- -## This test checks that the brick ports are copied on import by checking that -## they don't change when the following happens, -## - Stop a volume -## - Stop glusterd -## - Start the stopped volume -## - Start the stopped glusterd - -function get_brick_port() { - local VOL=$1 - local BRICK=$2 - $CLI2 volume status $VOL $BRICK --xml | sed -ne 's/.*<port>\([0-9]*\)<\/port>/\1/p' -} - - -cleanup - -TEST launch_cluster 2 -TEST $CLI1 peer probe $H2 -EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count - -# Create and start volume so that brick port assignment happens -TEST $CLI1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 -TEST $CLI1 volume start $V0 - -# Save port for 2nd brick -BPORT_ORIG=$(get_brick_port $V0 $H2:$B2/$V0) - -# Stop volume, stop 2nd glusterd, start volume, start 2nd glusterd -TEST $CLI1 volume stop $V0 -TEST kill_glusterd 2 - -TEST $CLI1 volume start $V0 -TEST start_glusterd 2 -EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count - -# Get new port and compare with old one -EXPECT_WITHIN $PROCESS_UP_TIMEOUT $BPORT_ORIG get_brick_port $V0 $H2:$B2/$V0 - -$CLI1 volume stop $V0 - -cleanup diff --git a/tests/bugs/io-cache/bug-read-hang.c b/tests/bugs/io-cache/bug-read-hang.c new file mode 100644 index 00000000000..74dfddd7a6e --- /dev/null +++ b/tests/bugs/io-cache/bug-read-hang.c @@ -0,0 +1,125 @@ +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NO_INIT 1 + +int count = 0; +void +read_cbk (glfs_fd_t *fd, ssize_t ret, void *data) { +count++; +} + +glfs_t * +setup_new_client(char *hostname, char *volname, char *log_file, int flag) +{ + int ret = 0; + glfs_t *fs = NULL; + + fs = glfs_new (volname); + if (!fs) { + fprintf (stderr, "\nglfs_new: returned NULL (%s)\n", + strerror (errno)); + goto error; + } + + ret = glfs_set_volfile_server (fs, "tcp", hostname, 24007); + if (ret < 0) { + fprintf (stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", + ret, strerror (errno)); + goto error; + } + + ret = glfs_set_logging (fs, log_file, 7); + if (ret < 0) { + fprintf (stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", + ret, strerror (errno)); + goto error; + } + + if (flag == NO_INIT) + goto out; + + ret = glfs_init (fs); + if (ret < 0) { + fprintf (stderr, "\nglfs_init failed with ret: %d (%s)\n", + ret, strerror (errno)); + goto error; + } + +out: + return fs; +error: + return NULL; +} + +int +main (int argc, char *argv[]) +{ + int ret = 0; + glfs_t *fs = NULL; + struct glfs_fd *fd = NULL; + char *volname = NULL; + char *log_file = NULL; + char *hostname = NULL; + char *buf = NULL; + struct stat stat; + + if (argc != 4) { + fprintf (stderr, + "Expect following args %s <hostname> <Vol> <log file location>\n" + , argv[0]); + return -1; + } + + hostname = argv[1]; + volname = argv[2]; + log_file = argv[3]; + + fs = setup_new_client (hostname, volname, log_file, 0); + if (!fs) { + fprintf (stderr, "\nsetup_new_client: returned NULL (%s)\n", + strerror (errno)); + goto error; + } + + fd = glfs_opendir (fs, "/"); + if (!fd) { + fprintf (stderr, "/: %s\n", strerror (errno)); + return -1; + } + + glfs_readdirplus (fd, &stat); + + fd = glfs_open (fs, "/test", O_RDWR); + if (fd == NULL) { + fprintf (stderr, "glfs_open: returned NULL\n"); + goto error; + } + + buf = (char *) malloc (5); + + ret = glfs_pread (fd, buf, 5, 0, 0); + if (ret < 0) { + fprintf (stderr, "Read(%s): %d (%s)\n", "test", ret, + strerror (errno)); + return ret; + } + + free (buf); + glfs_close (fd); + + ret = glfs_fini (fs); + if (ret < 0) { + fprintf (stderr, "glfs_fini failed with ret: %d (%s)\n", + ret, strerror (errno)); + return -1; + } + + return 0; +error: + return -1; +} diff --git a/tests/bugs/io-cache/bug-read-hang.t b/tests/bugs/io-cache/bug-read-hang.t new file mode 100755 index 00000000000..fb20c2c5515 --- /dev/null +++ b/tests/bugs/io-cache/bug-read-hang.t @@ -0,0 +1,30 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +#. $(dirname $0)/../../volume.rc + +cleanup; + +#Basic checks +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2}; + +TEST $CLI volume set $V0 performance.md-cache-timeout 60 +TEST $CLI volume set $V0 open-behind off + +logdir=`gluster --print-logdir` + +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +echo "Hello" > $M0/test + +TEST build_tester $(dirname $0)/bug-read-hang.c -lgfapi +TEST $(dirname $0)/bug-read-hang $H0 $V0 $logdir/bug-read-hang.log + +cleanup_tester $(dirname $0)/bug-read-hang + +cleanup; diff --git a/tests/bugs/replicate/bug-1402730.t b/tests/bugs/replicate/bug-1402730.t new file mode 100644 index 00000000000..dcde60dbdf7 --- /dev/null +++ b/tests/bugs/replicate/bug-1402730.t @@ -0,0 +1,42 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 granular-entry-heal on +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0 + +TEST mkdir -p $M0/a/b/c -p +cd $M0/a/b/c + +TEST kill_brick $V0 $H0 $B0/${V0}2 +rm -rf $B0/${V0}2/* +rm -rf $B0/${V0}2/.glusterfs +TEST $CLI volume start $V0 force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +TEST touch file + +GFID_C=$(get_gfid_string $M0/a/b/c) +TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$GFID_C/file +TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$GFID_C/file + +EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/a/b/c trusted.afr.$V0-client-2 entry +EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a/b/c trusted.afr.$V0-client-2 entry + +cd ~ + +cleanup diff --git a/tests/bugs/replicate/bug-1408712.t b/tests/bugs/replicate/bug-1408712.t new file mode 100644 index 00000000000..b26e8a06923 --- /dev/null +++ b/tests/bugs/replicate/bug-1408712.t @@ -0,0 +1,87 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup + +TESTS_EXPECTED_IN_LOOP=12 + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume heal $V0 granular-entry-heal enable +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 performance.flush-behind off + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 + +cd $M0 +TEST dd if=/dev/zero of=file bs=1M count=8 + +# Kill brick-0. +TEST kill_brick $V0 $H0 $B0/${V0}0 + +TEST "dd if=/dev/zero bs=1M count=8 >> file" + +FILE_GFID=$(get_gfid_string $M0/file) + +# Test that the index associated with '/.shard' is created on B1 and B2. +TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID +TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID +# Check for successful creation of granular entry indices +for i in {2..3} +do + TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i + TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i +done + +cd ~ +TEST md5sum $M1/file + +# Test that the index associated with '/.shard' and the created shards do not disappear on B1 and B2. +TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID +TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID +for i in {2..3} +do + TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i + TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i +done + +# Start the brick that was down +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +# Enable shd +TEST gluster volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST $CLI volume heal $V0 + +# Wait for heal to complete +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +# Now verify that there are no name indices left after self-heal +TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID +TEST ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID + +for i in {2..3} +do + TEST_IN_LOOP ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i + TEST_IN_LOOP ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i +done + +cleanup diff --git a/tests/bugs/snapshot/bug-1316437.t b/tests/bugs/snapshot/bug-1316437.t index 30a221e3171..0ae57a71657 100644 --- a/tests/bugs/snapshot/bug-1316437.t +++ b/tests/bugs/snapshot/bug-1316437.t @@ -20,8 +20,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist killall glusterd glusterfsd glusterfs -SNAPD_PID=$(ps auxww | grep snapd | grep -v grep | awk '{print $2}'); -TEST ! [ $SNAPD_PID -gt 0 ]; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_if_snapd_exist glusterd diff --git a/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t new file mode 100755 index 00000000000..1c50f746527 --- /dev/null +++ b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t @@ -0,0 +1,98 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../snapshot.rc +. $(dirname $0)/../../ssl.rc + +function file_exists +{ + if [ -f $1 ]; then echo "Y"; else echo "N"; fi +} + +function volume_online_brick_count +{ + $CLI volume status $V0 | awk '$1 == "Brick" && $6 != "N/A" { print $6}' | wc -l; +} + +cleanup; + +# Initialize the test setup +TEST setup_lvm 1; + +TEST create_self_signed_certs + +# Start glusterd +TEST glusterd +TEST pidof glusterd; + +# Create and start the volume +TEST $CLI volume create $V0 $H0:$L1/b1; + +TEST $CLI volume start $V0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count + +# Mount the volume and create some files +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0; + +TEST touch $M0/file; + +# Enable activate-on-create +TEST $CLI snapshot config activate-on-create enable; + +# Create a snapshot +TEST $CLI snapshot create snap1 $V0 no-timestamp; + +TEST $CLI volume set $V0 features.uss enable; + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist + +EXPECT "Y" file_exists $M0/file +# Volume set can trigger graph switch therefore chances are we send this +# req to old graph. Old graph will not have .snaps. Therefore we should +# wait for some time. +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +# Enable management encryption +touch $GLUSTERD_WORKDIR/secure-access +killall_gluster + +TEST glusterd +TEST pidof glusterd; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count + +# Mount the volume +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0; + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist + +EXPECT "Y" file_exists $M0/file +EXPECT "Y" file_exists $M0/.snaps/snap1/file + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +# Enable I/O encryption +TEST $CLI volume set $V0 client.ssl on +TEST $CLI volume set $V0 server.ssl on + +killall_gluster + +TEST glusterd +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count + +# Mount the volume +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0; + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist + +EXPECT "Y" file_exists $M0/file +EXPECT "Y" file_exists $M0/.snaps/snap1/file + +TEST $CLI snapshot delete all +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup; diff --git a/tests/bugs/upcall/bug-1394131.t b/tests/bugs/upcall/bug-1394131.t new file mode 100755 index 00000000000..b371ce4e682 --- /dev/null +++ b/tests/bugs/upcall/bug-1394131.t @@ -0,0 +1,29 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +## 1. Start glusterd +TEST glusterd; + +## 2. Lets create volume +TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3}; + +## 3. Enable the upcall xlator, and increase the md-cache timeout to max +TEST $CLI volume set $V0 features.cache-invalidation on +TEST $CLI volume set $V0 features.cache-invalidation-timeout 600 +TEST $CLI volume set $V0 indexing on + +## 6. Start the volume +TEST $CLI volume start $V0 + +## 7. Create two gluster mounts +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 + +## 8. Create directory and files from the M0 +TEST touch $M0/file1 +TEST mv $M0/file1 $M0/file2 + +cleanup; diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t index 9ee7fc6c16f..f5909f320ac 100644 --- a/tests/features/ssl-ciphers.t +++ b/tests/features/ssl-ciphers.t @@ -137,6 +137,7 @@ EXPECT "`pwd`/`dirname $0`/dh1024.pem" volume_option $V0 ssl.dh-param TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "Y" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT # Test the cipher-list option @@ -145,6 +146,7 @@ EXPECT AES256-SHA volume_option $V0 ssl.cipher-list TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT @@ -154,6 +156,7 @@ EXPECT EECDH:EDH:!TLSv1 volume_option $V0 ssl.cipher-list TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT @@ -162,6 +165,7 @@ EXPECT invalid volume_option $V0 ssl.ec-curve TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT TEST $CLI volume set $V0 ssl.ec-curve secp521r1 @@ -169,6 +173,7 @@ EXPECT secp521r1 volume_option $V0 ssl.ec-curve TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT # test revocation diff --git a/tests/include.rc b/tests/include.rc index d1acbee5995..9f32e88f5f5 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -11,6 +11,7 @@ B0=${B0:=/d/backends}; # top level of brick directories WORKDIRS="$B0 $M0 $M1 $M2 $N0 $N1" ROOT_GFID="00000000-0000-0000-0000-000000000001" +DOT_SHARD_GFID="be318638-e8a0-4c6d-977d-7a937aa84806" META_VOL=${META_VOL:=gluster_shared_storage}; # shared gluster storage volume used by snapshot scheduler, nfs ganesha and geo-rep. META_MNT=${META_MNT:=/var/run/gluster/shared_storage}; # Mount point of shared gluster volume. @@ -408,6 +409,7 @@ stat -c %s /dev/null > /dev/null 2>&1 || { *%Y*) cmd="${cmd} s/%Y/`$( which stat ) -f %m $f`/g;" ;& *%X*) cmd="${cmd} s/%X/`$( which stat ) -f %a $f`/g;" ;& *%Z*) cmd="${cmd} s/%Z/`$( which stat ) -f %c $f`/g;" ;& + *%.Z*) cmd="${cmd} s/%.Z/`$( which stat ) -f %.9Fc $f`/g;" ;& *%b*) cmd="${cmd} s/%b/`$( which stat ) -f %b $f`/g;" ;& *%B*) cmd="${cmd} s/%B/512/g;" ;& *%t*) cmd="${cmd} s/%t/`$( which stat ) -f %XHr $f`/g;" ;& diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py index 9e7774a9828..fb6c222df03 100755 --- a/tools/glusterfind/S57glusterfind-delete-post.py +++ b/tools/glusterfind/S57glusterfind-delete-post.py @@ -46,6 +46,10 @@ def main(): ls_glusterfind_dir = [] for session in ls_glusterfind_dir: + # don't blow away the keys directory + if session == ".keys": + continue + # Possible session directory volume_session_path = os.path.join(glusterfind_dir, session, diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 6431432690d..747577c9380 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -936,7 +936,7 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode, for (i = 0; i < priv->child_count; i++) { if (start_heal && priv->child_up[i] && - (!data_readable[i] || !metadata_readable[i])) { + (data_accused[i] || metadata_accused[i])) { *start_heal = _gf_true; break; } @@ -1026,7 +1026,10 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) } heal_local->refreshinode = inode_ref (local->refreshinode); heal_local->heal_frame = heal_frame; - afr_throttled_selfheal (heal_frame, this); + if (!afr_throttled_selfheal (heal_frame, this)) { + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } } refresh_done: @@ -5170,7 +5173,6 @@ out: int afr_transaction_local_init (afr_local_t *local, xlator_t *this) { - int child_up_count = 0; int ret = -ENOMEM; afr_private_t *priv = NULL; @@ -5189,10 +5191,6 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this) } ret = -ENOMEM; - child_up_count = AFR_COUNT (local->child_up, priv->child_count); - if (priv->optimistic_change_log && child_up_count == priv->child_count) - local->optimistic_change_log = 1; - local->pre_op_compat = priv->pre_op_compat; local->transaction.eager_lock = diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 58db6d1e497..faee8dbb89b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1985,7 +1985,7 @@ afr_heal_synctask (xlator_t *this, afr_local_t *local) afr_refresh_heal_done (ret, heal_frame, heal_frame); } -void +gf_boolean_t afr_throttled_selfheal (call_frame_t *frame, xlator_t *this) { gf_boolean_t can_heal = _gf_true; @@ -2013,6 +2013,8 @@ afr_throttled_selfheal (call_frame_t *frame, xlator_t *this) gf_msg_debug (this->name, 0, "Max number of heals are " "pending, background self-heal rejected."); } + + return can_heal; } int diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index ec5337e60b2..1705c967f60 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -85,7 +85,7 @@ int afr_selfheal (xlator_t *this, uuid_t gfid); -void +gf_boolean_t afr_throttled_selfheal (call_frame_t *frame, xlator_t *this); int diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 848387fc0db..4d0ed943a81 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1132,19 +1132,21 @@ void afr_changelog_populate_xdata (call_frame_t *frame, afr_xattrop_type_t op, dict_t **xdata, dict_t **newloc_xdata) { - dict_t *xdata1 = NULL; - dict_t *xdata2 = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int ret = 0; - const char *name = NULL; + int i = 0; + int ret = 0; + char *key = NULL; + const char *name = NULL; + dict_t *xdata1 = NULL; + dict_t *xdata2 = NULL; + xlator_t *this = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + gf_boolean_t need_entry_key_set = _gf_true; local = frame->local; - priv = THIS->private; + this = THIS; + priv = this->private; - /*Populate xdata for POST_OP only.*/ - if (op == AFR_TRANSACTION_PRE_OP) - goto out; if (local->transaction.type == AFR_DATA_TRANSACTION || local->transaction.type == AFR_METADATA_TRANSACTION) goto out; @@ -1155,26 +1157,63 @@ afr_changelog_populate_xdata (call_frame_t *frame, afr_xattrop_type_t op, xdata1 = dict_new(); if (!xdata1) goto out; + name = local->loc.name; if (local->op == GF_FOP_LINK) name = local->newloc.name; - ret = dict_set_str (xdata1, GF_XATTROP_ENTRY_IN_KEY, (char *)name); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, - "%s/%s: Could not set xattrop-entry key during post-op", - uuid_utoa (local->loc.pargfid), local->loc.name); - if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { - xdata2 = dict_new(); - if (!xdata2) - goto out; - ret = dict_set_str (xdata2, GF_XATTROP_ENTRY_IN_KEY, - (char *)local->newloc.name); + + switch (op) { + case AFR_TRANSACTION_PRE_OP: + key = GF_XATTROP_ENTRY_IN_KEY; + break; + case AFR_TRANSACTION_POST_OP: + if (afr_txn_nothing_failed (frame, this)) { + key = GF_XATTROP_ENTRY_OUT_KEY; + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.failed_subvols[i]) + continue; + need_entry_key_set = _gf_false; + break; + } + /* If the transaction itself did not fail and there + * are no failed subvolumes, check whether the fop + * failed due to a symmetric error. If it did, do + * not set the ENTRY_OUT xattr which would end up + * deleting a name index which was created possibly by + * an earlier entry txn that may have failed on some + * of the sub-volumes. + */ + if (local->op_ret) + need_entry_key_set = _gf_false; + } else { + key = GF_XATTROP_ENTRY_IN_KEY; + } + break; + } + + if (need_entry_key_set) { + ret = dict_set_str (xdata1, key, (char *)name); if (ret) gf_msg (THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, - "%s/%s: Could not set xattrop-entry key during" - " post-op", uuid_utoa (local->newloc.pargfid), - local->newloc.name); + "%s/%s: Could not set %s key during xattrop", + uuid_utoa (local->loc.pargfid), local->loc.name, + key); + if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + xdata2 = dict_new (); + if (!xdata2) + goto out; + + ret = dict_set_str (xdata2, key, + (char *)local->newloc.name); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "%s/%s: Could not set %s key during " + "xattrop", + uuid_utoa (local->newloc.pargfid), + local->newloc.name, key); + } } *xdata = xdata1; @@ -1286,6 +1325,20 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, return 0; } +static void +afr_init_optimistic_changelog_for_txn (xlator_t *this, afr_local_t *local) +{ + int locked_count = 0; + afr_private_t *priv = NULL; + + priv = this->private; + + locked_count = AFR_COUNT (local->transaction.pre_op, priv->child_count); + if (priv->optimistic_change_log && locked_count == priv->child_count) + local->optimistic_change_log = 1; + + return; +} int afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) @@ -1317,6 +1370,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) } } + afr_init_optimistic_changelog_for_txn (this, local); + /* This condition should not be met with present code, as * transaction.done will be called if locks are not acquired on even a * single node. diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 50a948ffe30..a97d03bb055 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -898,8 +898,11 @@ unlock: dht_layout_set (this, local->inode, layout); } - dht_inode_ctx_time_update (local->inode, this, - &local->stbuf, 1); + if (local->inode) { + dht_inode_ctx_time_update (local->inode, this, + &local->stbuf, 1); + } + if (local->loc.parent) { dht_inode_ctx_time_update (local->loc.parent, this, &local->postparent, 1); @@ -1313,6 +1316,7 @@ dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = (dht_local_t*)frame->local; path = local->loc.path; + FRAME_SU_UNDO (frame, dht_local_t); gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO, "lookup_unlink returned with " @@ -2006,7 +2010,12 @@ unlock: loc->path, subvol->name, (local->hashed_subvol? local->hashed_subvol->name : "<null>")); - + /* * + * These stale files may be created using root + * user. Hence deletion will work only with + * root. + */ + FRAME_SU_DO (frame, dht_local_t); STACK_WIND (frame, dht_lookup_unlink_cbk, subvol, subvol->fops->unlink, loc, 0, dict_req); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 71b093b20ea..613a9d39816 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -550,6 +550,7 @@ struct dht_conf { /* lock migration */ gf_boolean_t lock_migration_enabled; + gf_lock_t lock; /* du stats */ uint32_t du_refresh_interval_sec; diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c index 66e3ede736b..f8e614a40aa 100644 --- a/xlators/cluster/dht/src/dht-hashfn.c +++ b/xlators/cluster/dht/src/dht-hashfn.c @@ -41,12 +41,16 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p) static gf_boolean_t -dht_munge_name (const char *original, char *modified, size_t len, regex_t *re) +dht_munge_name (const char *original, char *modified, + size_t len, regex_t *re) { - regmatch_t matches[2]; - size_t new_len; + regmatch_t matches[2] = {{0}, }; + size_t new_len = 0; + int ret = 0; - if (regexec(re,original,2,matches,0) != REG_NOMATCH) { + ret = regexec(re, original, 2, matches, 0); + + if (ret != REG_NOMATCH) { if (matches[1].rm_so != -1) { new_len = matches[1].rm_eo - matches[1].rm_so; /* Equal would fail due to the NUL at the end. */ @@ -60,7 +64,7 @@ dht_munge_name (const char *original, char *modified, size_t len, regex_t *re) } /* This is guaranteed safe because of how the dest was allocated. */ - strcpy(modified,original); + strcpy(modified, original); return _gf_false; } @@ -68,28 +72,36 @@ int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p) { char *rsync_friendly_name = NULL; - dht_conf_t *priv = this->private; + dht_conf_t *priv = NULL; size_t len = 0; gf_boolean_t munged = _gf_false; - if (priv->extra_regex_valid) { - len = strlen(name) + 1; - rsync_friendly_name = alloca(len); - munged = dht_munge_name (name, rsync_friendly_name, len, - &priv->extra_regex); - } + priv = this->private; - if (!munged && priv->rsync_regex_valid) { - len = strlen(name) + 1; - rsync_friendly_name = alloca(len); - gf_msg_trace (this->name, 0, "trying regex for %s", name); - munged = dht_munge_name (name, rsync_friendly_name, len, - &priv->rsync_regex); - if (munged) { - gf_msg_debug (this->name, 0, - "munged down to %s", rsync_friendly_name); + LOCK (&priv->lock); + { + if (priv->extra_regex_valid) { + len = strlen(name) + 1; + rsync_friendly_name = alloca(len); + munged = dht_munge_name (name, rsync_friendly_name, len, + &priv->extra_regex); + } + + if (!munged && priv->rsync_regex_valid) { + len = strlen(name) + 1; + rsync_friendly_name = alloca(len); + gf_msg_trace (this->name, 0, "trying regex for %s", + name); + munged = dht_munge_name (name, rsync_friendly_name, len, + &priv->rsync_regex); + if (munged) { + gf_msg_debug (this->name, 0, + "munged down to %s", + rsync_friendly_name); + } } } + UNLOCK (&priv->lock); if (!munged) { rsync_friendly_name = (char *)name; diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 153f4de0458..30b64eb5711 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES 117 +#define GLFS_DHT_NUM_MESSAGES 118 #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) /* Messages with message IDs */ @@ -1072,11 +1072,18 @@ #define DHT_MSG_LOCK_INODE_UNREF_FAILED (GLFS_DHT_BASE + 116) /* - * @messageid 109116 + * @messageid 109117 * @diagnosis * @recommendedaction None */ #define DHT_MSG_ASPRINTF_FAILED (GLFS_DHT_BASE + 117) +/* + * @messageid 109118 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_DIR_LOOKUP_FAILED (GLFS_DHT_BASE + 118) + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* _DHT_MESSAGES_H_ */ diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 7a19b82c4f0..10fd878041e 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -376,6 +376,50 @@ out: return ret; } + +static int +__check_file_has_hardlink (xlator_t *this, loc_t *loc, + struct iatt *stbuf, dict_t *xattrs, int flags, + gf_defrag_info_t *defrag) +{ + int ret = 0; + + if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) { + ret = 0; + return ret; + } + if (stbuf->ia_nlink > 1) { + /* support for decomission */ + if (flags == GF_DHT_MIGRATE_HARDLINK) { + synclock_lock (&defrag->link_lock); + ret = gf_defrag_handle_hardlink + (this, loc, xattrs, stbuf); + synclock_unlock (&defrag->link_lock); + /* + Returning zero will force the file to be remigrated. + Checkout gf_defrag_handle_hardlink for more information. + */ + if (ret && ret != -2) { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to migrate file with link", + loc->path); + } + } else { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: file has hardlinks", loc->path); + ret = -ENOTSUP; + } + } + + return ret; +} + + + /* return values 0 : File will be migrated @@ -424,40 +468,9 @@ __is_file_migratable (xlator_t *this, loc_t *loc, } } - if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) { - ret = 0; - goto out; - } - - if (stbuf->ia_nlink > 1) { - /* support for decomission */ - if (flags == GF_DHT_MIGRATE_HARDLINK) { - synclock_lock (&defrag->link_lock); - ret = gf_defrag_handle_hardlink - (this, loc, xattrs, stbuf); - synclock_unlock (&defrag->link_lock); - /* - Returning zero will force the file to be remigrated. - Checkout gf_defrag_handle_hardlink for more information. - */ - if (ret && ret != -2) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to migrate file with link", - loc->path); - } - } else { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: file has hardlinks", loc->path); - ret = -ENOTSUP; - } - goto out; - } - - ret = 0; + /* Check if file has hardlink*/ + ret = __check_file_has_hardlink (this, loc, stbuf, xattrs, + flags, defrag); out: return ret; @@ -1337,8 +1350,13 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + if (xattr_rsp) { + /* we no more require this key */ + dict_del (dict, conf->link_xattr_name); + dict_unref (xattr_rsp); + } - ret = syncop_fstat (from, src_fd, &stbuf, NULL, NULL); + ret = syncop_fstat (from, src_fd, &stbuf, dict, &xattr_rsp); if (ret) { gf_msg (this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, @@ -1348,6 +1366,15 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + /* Check again if file has hardlink */ + ret = __check_file_has_hardlink (this, loc, &stbuf, xattr_rsp, + flag, defrag); + if (ret) { + if (ret == -2) + ret = 0; + goto out; + } + /* Try to preserve 'holes' while migrating data */ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) file_has_holes = 1; @@ -2335,6 +2362,7 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container, struct dht_container *tmp_container = NULL; xlator_t *hashed_subvol = NULL; xlator_t *cached_subvol = NULL; + int fop_errno = 0; if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ret = -1; @@ -2358,11 +2386,11 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container, } if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, + gf_msg (this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED, - "%s: Migrate data failed: Readdir returned" - " %s. Aborting migrate-data", loc->path, - strerror(-ret)); + "Readdirp failed. Aborting data migration for " + "directory: %s", loc->path); + fop_errno = -ret; ret = -1; goto out; } @@ -2494,9 +2522,9 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container, ret = syncop_lookup (this, &entry_loc, NULL, NULL, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, + gf_msg (this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:%s lookup failed", + "lookup failed for file:%s", entry_loc.path); if (-ret != ENOENT && -ret != ESTALE) { @@ -2617,6 +2645,9 @@ out: if (xattr_rsp) dict_unref (xattr_rsp); + + + errno = fop_errno; return ret; } @@ -2642,6 +2673,7 @@ gf_defrag_process_dir (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, int throttle_up = 0; struct dir_dfmeta *dir_dfmeta = NULL; int should_commit_hash = 1; + int fop_errno = 0; gf_log (this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); @@ -2664,10 +2696,11 @@ gf_defrag_process_dir (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = syncop_opendir (this, loc, fd, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, + gf_msg (this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_DATA_FAILED, "Migrate data failed: Failed to open dir %s", loc->path); + fop_errno = -ret; ret = -1; goto out; } @@ -2814,9 +2847,12 @@ gf_defrag_process_dir (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, migrate_data, dir_dfmeta, xattr_req, &should_commit_hash); + if (ret) { - gf_log ("DHT", GF_LOG_INFO, "Found critical " + fop_errno = errno; + gf_log ("this->name", GF_LOG_WARNING, "Found " "error from gf_defrag_get_entry"); + ret = -1; goto out; } @@ -2874,6 +2910,7 @@ out: ret = 2; } + errno = fop_errno; return ret; } int @@ -3043,7 +3080,6 @@ out: return ret; } - int gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -3067,14 +3103,33 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - - ret = syncop_lookup (this, loc, &iatt, NULL, NULL, NULL); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s", - loc->path); - ret = -1; - goto out; + if (strcmp (loc->path, "/") == 0) { + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, + "lookup failed for:%s", loc->path); + + defrag->total_failures++; + ret = -1; + goto out; + } + + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg (this->name, GF_LOG_INFO, errno, + DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. Skipping", + loc->path); + ret = 0; + goto out; + } else { + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, + "lookup failed for:%s", loc->path); + + defrag->total_failures++; + goto out; + } } if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && @@ -3082,18 +3137,24 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = gf_defrag_process_dir (this, defrag, loc, migrate_data); if (ret && ret != 2) { - defrag->total_failures++; + if (errno == ENOENT || errno == ESTALE) { + ret = 0; + goto out; + } else { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, - "gf_defrag_process_dir failed for directory: %s" - , loc->path); + defrag->total_failures++; - if (conf->decommission_in_progress) { - goto out; - } + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, + "gf_defrag_process_dir failed for " + "directory: %s", loc->path); - should_commit_hash = 0; + if (conf->decommission_in_progress) { + goto out; + } + + should_commit_hash = 0; + } } else if (ret == 2) { should_commit_hash = 0; } @@ -3110,8 +3171,14 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = syncop_opendir (this, loc, fd, NULL, NULL); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s", - loc->path); + if (-ret == ENOENT || -ret == ESTALE) { + ret = 0; + goto out; + } + + gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s, " + "err:%d", loc->path, -ret); + ret = -1; goto out; } @@ -3123,8 +3190,15 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, { if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s" - ". Aborting fix-layout",strerror(-ret)); + if (-ret == ENOENT || -ret == ESTALE) { + ret = 0; + goto out; + } + + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_READDIR_ERROR, "readdirp failed for " + "path %s. Aborting fix-layout", loc->path); + ret = -1; goto out; } @@ -3216,43 +3290,63 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = syncop_lookup (this, &entry_loc, &iatt, NULL, NULL, NULL); - /*Check whether it is ENOENT or ESTALE*/ if (ret) { - gf_log (this->name, GF_LOG_ERROR, "%s" - " lookup failed with %d", - entry_loc.path, -ret); - - if (!conf->decommission_in_progress && - -ret != ENOENT && -ret != ESTALE) { - should_commit_hash = 0; + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg (this->name, GF_LOG_INFO, errno, + DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. " + "Skipping", loc->path); + ret = 0; + continue; + } else { + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, + "lookup failed for:%s", + entry_loc.path); + defrag->total_failures++; + if (conf->decommission_in_progress) { + defrag->defrag_status = + GF_DEFRAG_STATUS_FAILED; + ret = -1; + goto out; + } else { + should_commit_hash = 0; + continue; + } } - - continue; } ret = syncop_setxattr (this, &entry_loc, fix_layout, 0, NULL, NULL); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Setxattr " - "failed for %s", entry_loc.path); - - defrag->total_failures++; - - /*Don't go for fix-layout of child subtree if" - fix-layout failed*/ - if (conf->decommission_in_progress) { - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; - - ret = -1; - - goto out; - } else { + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg (this->name, GF_LOG_INFO, -ret, + DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed. Dir %s " + "renamed or removed", + entry_loc.path); + ret = 0; continue; + } else { + + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed for %s", + entry_loc.path); + + defrag->total_failures++; + + if (conf->decommission_in_progress) { + defrag->defrag_status = + GF_DEFRAG_STATUS_FAILED; + ret = -1; + goto out; + } else { + continue; + } } } - /* A return value of 2 means, either process_dir or * lookup of a dir failed. Hence, don't commit hash * for the current directory*/ @@ -3272,8 +3366,6 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; } else { /* Let's not commit-hash if diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index 777c63de685..a9ffd1d9fb5 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -637,6 +637,7 @@ dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; prev = cookie; + FRAME_SU_UNDO (frame, dht_local_t); if (!local) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_VALUE, @@ -745,7 +746,12 @@ dht_rename_cleanup (call_frame_t *frame) local->loc2.pargfid) == 0) { DHT_MARKER_DONT_ACCOUNT(xattr_new); } - + /* * + * The link to file is created using root permission. + * Hence deletion should happen using root. Otherwise + * it will fail. + */ + FRAME_SU_DO (frame, dht_local_t); STACK_WIND (frame, dht_rename_unlink_cbk, src_cached, src_cached->fops->unlink, &local->loc2, 0, xattr_new); diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 56bfdedc642..ccbf66b626d 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -336,9 +336,9 @@ out: } void dht_init_regex (xlator_t *this, dict_t *odict, char *name, - regex_t *re, gf_boolean_t *re_valid) + regex_t *re, gf_boolean_t *re_valid, dht_conf_t *conf) { - char *temp_str; + char *temp_str = NULL; if (dict_get_str (odict, name, &temp_str) != 0) { if (strcmp(name,"rsync-hash-regex")) { @@ -347,25 +347,29 @@ dht_init_regex (xlator_t *this, dict_t *odict, char *name, temp_str = "^\\.(.+)\\.[^.]+$"; } - if (*re_valid) { - regfree(re); - *re_valid = _gf_false; - } + LOCK (&conf->lock); + { + if (*re_valid) { + regfree(re); + *re_valid = _gf_false; + } - if (!strcmp(temp_str,"none")) { - return; - } + if (!strcmp(temp_str, "none")) { + goto unlock; + } - if (regcomp(re,temp_str,REG_EXTENDED) == 0) { - gf_msg_debug (this->name, 0, - "using regex %s = %s", name, temp_str); - *re_valid = _gf_true; - } - else { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_REGEX_INFO, - "compiling regex %s failed", temp_str); + if (regcomp(re, temp_str, REG_EXTENDED) == 0) { + gf_msg_debug (this->name, 0, + "using regex %s = %s", name, temp_str); + *re_valid = _gf_true; + } else { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_REGEX_INFO, + "compiling regex %s failed", temp_str); + } } +unlock: + UNLOCK (&conf->lock); } int @@ -488,9 +492,9 @@ dht_reconfigure (xlator_t *this, dict_t *options) } dht_init_regex (this, options, "rsync-hash-regex", - &conf->rsync_regex, &conf->rsync_regex_valid); + &conf->rsync_regex, &conf->rsync_regex_valid, conf); dht_init_regex (this, options, "extra-hash-regex", - &conf->extra_regex, &conf->extra_regex_valid); + &conf->extra_regex, &conf->extra_regex_valid, conf); GF_OPTION_RECONF ("weighted-rebalance", conf->do_weighting, options, bool, out); @@ -632,6 +636,10 @@ dht_init (xlator_t *this) goto err; } + LOCK_INIT (&conf->subvolume_lock); + LOCK_INIT (&conf->layout_lock); + LOCK_INIT (&conf->lock); + /* We get the commit-hash to set only for rebalance process */ if (dict_get_uint32 (this->options, "commit-hash", &commit_hash) == 0) { @@ -789,17 +797,15 @@ dht_init (xlator_t *this) } dht_init_regex (this, this->options, "rsync-hash-regex", - &conf->rsync_regex, &conf->rsync_regex_valid); + &conf->rsync_regex, &conf->rsync_regex_valid, conf); dht_init_regex (this, this->options, "extra-hash-regex", - &conf->extra_regex, &conf->extra_regex_valid); + &conf->extra_regex, &conf->extra_regex_valid, conf); ret = dht_layouts_init (this, conf); if (ret == -1) { goto err; } - LOCK_INIT (&conf->subvolume_lock); - LOCK_INIT (&conf->layout_lock); conf->gen = 1; diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 16975b58f6c..69f182c5194 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -4030,7 +4030,8 @@ init (xlator_t *this) GF_OPTION_INIT ("log-level", log_str, str, out); if (log_str) { log_level = glusterd_check_log_level (log_str); - gf_log_set_loglevel (log_level); + if (DEFAULT_LOG_LEVEL != log_level) + gf_log_set_loglevel (log_level); } GF_OPTION_INIT ("logger", logger_str, str, out); diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 7b8713c89ef..f68dd55d766 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -648,6 +648,8 @@ index_del (xlator_t *this, uuid_t gfid, const char *subdir, int type) index_priv_t *priv = NULL; int ret = 0; char gfid_path[PATH_MAX] = {0}; + char rename_dst[PATH_MAX] = {0,}; + uuid_t uuid; priv = this->private; GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (gfid), @@ -655,10 +657,27 @@ index_del (xlator_t *this, uuid_t gfid, const char *subdir, int type) make_gfid_path (priv->index_basepath, subdir, gfid, gfid_path, sizeof (gfid_path)); - if ((strcmp (subdir, ENTRY_CHANGES_SUBDIR)) == 0) + if ((strcmp (subdir, ENTRY_CHANGES_SUBDIR)) == 0) { ret = sys_rmdir (gfid_path); - else + /* rmdir above could fail with ENOTEMPTY if the indices under + * it were created when granular-entry-heal was enabled, whereas + * the actual heal that happened was non-granular (or full) in + * nature, resulting in name indices getting left out. To + * clean up this directory without it affecting the IO path perf, + * the directory is renamed to a unique name under + * indices/entry-changes. Self-heal will pick up this entry + * during crawl and on lookup into the file system figure that + * the index is stale and subsequently wipe it out using rmdir(). + */ + if ((ret) && (errno == ENOTEMPTY)) { + gf_uuid_generate (uuid); + make_gfid_path (priv->index_basepath, subdir, uuid, + rename_dst, sizeof (rename_dst)); + ret = sys_rename (gfid_path, rename_dst); + } + } else { ret = sys_unlink (gfid_path); + } if (ret && (errno != ENOENT)) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to delete" diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index e0e7c9857e6..f578f6c3f44 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1492,6 +1492,8 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (priv->feature_enabled & GF_XTIME) { //update marks on oldpath + if (!local->loc.inode) + local->loc.inode = inode_ref (oplocal->loc.inode); gf_uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid); marker_xtime_update_marks (this, oplocal); marker_xtime_update_marks (this, local); diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index fd5507ff694..fb413d00dd5 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -2105,13 +2105,7 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("trash-max-filesize", max_fsize, options, size_uint64, out); if (max_fsize) { - if (max_fsize > GF_ALLOWED_MAX_FILE_SIZE) { - gf_log (this->name, GF_LOG_DEBUG, - "Size specified for max-size(in MB) is too " - "large so using 1GB as max-size (NOT IDEAL)"); - priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE; - } else - priv->max_trash_file_size = max_fsize; + priv->max_trash_file_size = max_fsize; gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size", priv->max_trash_file_size); } @@ -2434,13 +2428,7 @@ init (xlator_t *this) GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB); priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE; } else { - if( max_trash_file_size64 > GF_ALLOWED_MAX_FILE_SIZE ) { - gf_log (this->name, GF_LOG_DEBUG, - "Size specified for max-size(in MB) is too " - "large so using 1GB as max-size (NOT IDEAL)"); - priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE; - } else - priv->max_trash_file_size = max_trash_file_size64; + priv->max_trash_file_size = max_trash_file_size64; gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size", priv->max_trash_file_size); } diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h index 088c1b9a286..7dd83afc09a 100644 --- a/xlators/features/trash/src/trash.h +++ b/xlators/features/trash/src/trash.h @@ -28,10 +28,6 @@ #define GF_DEFAULT_MAX_FILE_SIZE (200 * GF_UNIT_MB) #endif -#ifndef GF_ALLOWED_MAX_FILE_SIZE -#define GF_ALLOWED_MAX_FILE_SIZE (1 * GF_UNIT_GB) -#endif - struct trash_struct { fd_t *fd; /* for the fd of existing file */ fd_t *newfd; /* for the newly created file */ diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c index 81199eb074c..66cbddf5fa9 100644 --- a/xlators/features/upcall/src/upcall-internal.c +++ b/xlators/features/upcall/src/upcall-internal.c @@ -279,6 +279,11 @@ upcall_cleanup_expired_clients (xlator_t *this, up_client->access_time; if (t_expired > (2*timeout)) { + + gf_log (THIS->name, GF_LOG_TRACE, + "Cleaning up client_entry(%s)", + up_client->client_uid); + ret = __upcall_cleanup_client_entry (up_client); @@ -289,9 +294,6 @@ upcall_cleanup_expired_clients (xlator_t *this, up_client); goto out; } - gf_log (THIS->name, GF_LOG_TRACE, - "Cleaned up client_entry(%s)", - up_client->client_uid); } } } @@ -511,8 +513,13 @@ upcall_cache_invalidate (call_frame_t *frame, xlator_t *this, client_t *client, gf_uuid_copy (up_inode_ctx->gfid, stbuf->ia_gfid); } - GF_VALIDATE_OR_GOTO ("upcall_cache_invalidate", - !(gf_uuid_is_null (up_inode_ctx->gfid)), out); + if (gf_uuid_is_null (up_inode_ctx->gfid)) { + gf_msg_debug (this->name, 0, "up_inode_ctx->gfid and " + "stbuf->ia_gfid is NULL, fop:%s", + gf_fop_list[frame->root->op]); + goto out; + } + pthread_mutex_lock (&up_inode_ctx->client_list_lock); { list_for_each_entry_safe (up_client_entry, tmp, diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index e7afb9b599d..9a702b70313 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -81,6 +81,8 @@ static char *gsync_reserved_opts[] = { static char *gsync_no_restart_opts[] = { "checkpoint", + "log_rsync_performance", + "log-rsync-performance", NULL }; @@ -583,7 +585,7 @@ struct dictidxmark { struct slave_vol_config { char old_slvhost[_POSIX_HOST_NAME_MAX+1]; - char old_slvuser[_POSIX_LOGIN_NAME_MAX]; + char old_slvuser[LOGIN_NAME_MAX]; unsigned old_slvidx; char slave_voluuid[GF_UUID_BUF_SIZE]; }; @@ -2914,6 +2916,14 @@ get_slavehost_from_voluuid (dict_t *dict, char *key, data_t *value, void *data) /* To go past username in non-root geo-rep session */ tmp = strchr (slave_host, '@'); if (tmp) { + if ((tmp - slave_host) >= LOGIN_NAME_MAX) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Invalid slave user length in %s", + slave_host); + ret = -2; + goto out; + } strncpy (slave_vol->old_slvuser, slave_host, (tmp - slave_host)); slave_vol->old_slvuser[(tmp - slave_host) + 1] @@ -3336,7 +3346,8 @@ glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr) } } else if (ret == -2) { snprintf (errmsg, sizeof (errmsg), "get_slavehost_from_voluuid" - " failed %s %s!!", slave_host, slave_vol); + " failed for %s::%s. Please check the glusterd logs.", + slave_host, slave_vol); gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION, "get_slavehost_from_voluuid failed %s %s!!", slave_host, slave_vol); diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h index 0524ec48fca..045bc2e4ba7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h @@ -20,7 +20,7 @@ /* slave info format: * <master host uuid>:ssh://{<slave_user>@}<slave host>::<slave volume> \ * :<slave volume uuid> */ -#define VOLINFO_SLAVE_URL_MAX (_POSIX_LOGIN_NAME_MAX + (2*GF_UUID_BUF_SIZE) \ +#define VOLINFO_SLAVE_URL_MAX (LOGIN_NAME_MAX + (2*GF_UUID_BUF_SIZE) \ + SLAVE_URL_INFO_MAX + 10) typedef struct glusterd_gsync_status_temp { diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index ba40b8f7628..623f4dc414e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 578 +#define GLFS_NUM_MESSAGES 579 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4673,6 +4673,14 @@ */ #define GD_MSG_DICT_GET_SUCCESS (GLUSTERD_COMP_BASE + 578) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_PMAP_REGISTRY_REMOVE_FAIL (GLUSTERD_COMP_BASE + 579) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 9c26cb50e9b..20b51edee3c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1112,6 +1112,25 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) if (key_fixed) key = key_fixed; + if (strcmp (key, "cluster.granular-entry-heal") == 0) { + /* For granular entry-heal, if the set command was + * invoked through volume-set CLI, then allow the + * command only if the volume is still in 'Created' + * state + */ + if ((dict_get (dict, "is-special-key") == NULL) && + (volinfo->status != GLUSTERD_STATUS_NONE)) { + snprintf (errstr, sizeof (errstr), " 'gluster " + "volume set <VOLNAME> %s {enable, " + "disable}' is not supported. Use " + "'gluster volume heal <VOLNAME> " + "granular-entry-heal {enable, " + "disable}' instead.", key); + ret = -1; + goto out; + } + } + /* Check if the key is cluster.op-version and set * local_new_op_version to the value given if possible. */ @@ -2017,7 +2036,6 @@ glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict) gf_boolean_t all = _gf_false; char *next_version = NULL; gf_boolean_t quorum_action = _gf_false; - gf_boolean_t option = _gf_false; char *op_errstr = NULL; conf = this->private; @@ -2047,21 +2065,6 @@ glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict) if (key_fixed) key = key_fixed; - option = dict_get_str_boolean (conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, - _gf_false); - if (option) { - ret = tear_down_cluster(); - if (ret == -1) - gf_msg (THIS->name, GF_LOG_WARNING, errno, - GD_MSG_DICT_GET_FAILED, - "Could not tear down NFS-Ganesha cluster"); - ret = stop_ganesha (&op_errstr); - if (ret) - gf_msg (THIS->name, GF_LOG_WARNING, 0, - GD_MSG_NFS_GNS_STOP_FAIL, - "Could not stop NFS-Ganesha service"); - } - ret = -1; dup_opt = dict_new (); if (!dup_opt) diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 6a89a4fe6e3..fd1936db301 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -203,6 +203,29 @@ pmap_registry_alloc (xlator_t *this) return port; } +/* pmap_assign_port does a pmap_registry_remove followed by pmap_registry_alloc, + * the reason for the former is to ensure we don't end up with stale ports + */ +int +pmap_assign_port (xlator_t *this, int old_port, const char *path) +{ + int ret = -1; + int new_port = 0; + + if (old_port) { + ret = pmap_registry_remove (this, 0, path, + GF_PMAP_PORT_BRICKSERVER, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, + GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0, "Failed toi" + "remove pmap registry for older signin for path" + " %s", path); + } + } + new_port = pmap_registry_alloc (this); + return new_port; +} + int pmap_registry_bind (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt) @@ -452,7 +475,6 @@ __gluster_pmap_signout (rpcsvc_request_t *req) req->rpc_err = GARBAGE_ARGS; goto fail; } - rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick, GF_PMAP_PORT_BRICKSERVER, req->trans); diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h index 95ded04208d..14187daee2b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.h +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h @@ -35,6 +35,8 @@ struct pmap_registry { struct pmap_port_status ports[65536]; }; +int pmap_assign_port (xlator_t *this, int port, const char *path); +int pmap_mark_port_leased (xlator_t *this, int port); int pmap_registry_alloc (xlator_t *this); int pmap_registry_bind (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c index 830dc1a706d..36e4a196845 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c @@ -295,28 +295,7 @@ glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags) "--brick-name", snapd_id, "-S", svc->conn.sockpath, NULL); - /* Do a pmap registry remove on the older connected port */ - if (volinfo->snapd.port) { - ret = pmap_registry_remove (this, volinfo->snapd.port, - snapd_id, GF_PMAP_PORT_BRICKSERVER, - NULL); - if (ret) { - snprintf (msg, sizeof (msg), "Failed to remove pmap " - "registry for older signin"); - goto out; - } - } - - snapd_port = pmap_registry_alloc (THIS); - if (!snapd_port) { - snprintf (msg, sizeof (msg), "Could not allocate port " - "for snapd service for volume %s", - volinfo->volname); - runner_log (&runner, this->name, GF_LOG_DEBUG, msg); - ret = -1; - goto out; - } - + snapd_port = pmap_assign_port (THIS, volinfo->snapd.port, snapd_id); volinfo->snapd.port = snapd_port; runner_add_arg (&runner, "--brick-port"); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 97713c461c4..de5fce5a965 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1794,6 +1794,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, char socketpath[PATH_MAX] = {0}; char glusterd_uuid[1024] = {0,}; char valgrind_logfile[PATH_MAX] = {0}; + char rdma_brick_path[PATH_MAX] = {0,}; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1826,9 +1827,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, if (gf_is_service_running (pidfile, NULL)) goto connect; - port = brickinfo->port; - if (!port) - port = pmap_registry_alloc (THIS); + port = pmap_assign_port (THIS, brickinfo->port, brickinfo->path); /* Build the exp_path, before starting the glusterfsd even in valgrind mode. Otherwise all the glusterfsd processes start @@ -1893,9 +1892,10 @@ retry: if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { runner_argprintf (&runner, "%d", port); } else { - rdma_port = brickinfo->rdma_port; - if (!rdma_port) - rdma_port = pmap_registry_alloc (THIS); + snprintf (rdma_brick_path, sizeof(rdma_brick_path), "%s.rdma", + brickinfo->path); + rdma_port = pmap_assign_port (THIS, brickinfo->rdma_port, + rdma_brick_path); runner_argprintf (&runner, "%d,%d", port, rdma_port); runner_add_arg (&runner, "--xlator-option"); runner_argprintf (&runner, "%s-server.transport.rdma.listen-port=%d", diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 7b53ff5f645..2344fd169f1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -5693,6 +5693,8 @@ glusterd_snapdsvc_generate_volfile (volgen_graph_t *graph, char *xlator = NULL; char *value = NULL; char auth_path[] = "auth-path"; + char *ssl_str = NULL; + gf_boolean_t ssl_bool = _gf_false; set_dict = dict_copy (volinfo->dict, NULL); if (!set_dict) @@ -5737,6 +5739,19 @@ glusterd_snapdsvc_generate_volfile (volgen_graph_t *graph, if (ret) return -1; + if (dict_get_str (set_dict, "server.ssl", &ssl_str) == 0) { + if (gf_string2boolean (ssl_str, &ssl_bool) == 0) { + if (ssl_bool) { + ret = xlator_set_option(xl, + "transport.socket.ssl-enabled", + "true"); + if (ret) { + return -1; + } + } + } + } + RPC_SET_OPT(xl, SSL_OWN_CERT_OPT, "ssl-own-cert", return -1); RPC_SET_OPT(xl, SSL_PRIVATE_KEY_OPT,"ssl-private-key", return -1); RPC_SET_OPT(xl, SSL_CA_LIST_OPT, "ssl-ca-list", return -1); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 0c56a90a6d3..61c79655ccf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -766,8 +766,9 @@ out: return ret; } static int -glusterd_handle_heal_enable_disable (rpcsvc_request_t *req, dict_t *dict, - glusterd_volinfo_t *volinfo) +glusterd_handle_heal_options_enable_disable (rpcsvc_request_t *req, + dict_t *dict, + glusterd_volinfo_t *volinfo) { gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; int ret = 0; @@ -781,30 +782,58 @@ glusterd_handle_heal_enable_disable (rpcsvc_request_t *req, dict_t *dict, } if ((heal_op != GF_SHD_OP_HEAL_ENABLE) && - (heal_op != GF_SHD_OP_HEAL_DISABLE)) { + (heal_op != GF_SHD_OP_HEAL_DISABLE) && + (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) && + (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) { ret = -EINVAL; goto out; } - if (heal_op == GF_SHD_OP_HEAL_ENABLE) { + if (((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) && + (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { + ret = -1; + goto out; + } + + if ((heal_op == GF_SHD_OP_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)) { value = "enable"; - } else if (heal_op == GF_SHD_OP_HEAL_DISABLE) { + } else if ((heal_op == GF_SHD_OP_HEAL_DISABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) { value = "disable"; } /* Convert this command to volume-set command based on volume type */ if (volinfo->type == GF_CLUSTER_TYPE_TIER) { - ret = glusterd_handle_shd_option_for_tier (volinfo, value, - dict); - if (!ret) - goto set_volume; - goto out; + switch (heal_op) { + case GF_SHD_OP_HEAL_ENABLE: + case GF_SHD_OP_HEAL_DISABLE: + ret = glusterd_handle_shd_option_for_tier (volinfo, + value, dict); + if (!ret) + goto set_volume; + goto out; + /* For any other heal_op, including granular-entry heal, + * just break out of the block but don't goto out yet. + */ + default: + break; + } } - key = volgen_get_shd_key (volinfo->type); - if (!key) { - ret = -1; - goto out; + if ((heal_op == GF_SHD_OP_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_HEAL_DISABLE)) { + key = volgen_get_shd_key (volinfo->type); + if (!key) { + ret = -1; + goto out; + } + } else { + key = "cluster.granular-entry-heal"; + ret = dict_set_int8 (dict, "is-special-key", 1); + if (ret) + goto out; } ret = dict_set_str (dict, "key1", key); @@ -890,7 +919,7 @@ __glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) goto out; } - ret = glusterd_handle_heal_enable_disable (req, dict, volinfo); + ret = glusterd_handle_heal_options_enable_disable (req, dict, volinfo); if (ret == -EINVAL) { ret = 0; } else { @@ -1480,6 +1509,15 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr, goto out; } + /* This is an incremental approach to have all the volinfo objects ref + * count. The first attempt is made in volume start transaction to + * ensure it doesn't race with import volume where stale volume is + * deleted. There are multiple instances of GlusterD crashing in + * bug-948686.t because of this. Once this approach is full proof, all + * other volinfo objects will be refcounted. + */ + glusterd_volinfo_ref (volinfo); + if (priv->op_version > GD_OP_VERSION_3_7_5) { ret = glusterd_validate_quorum (this, GD_OP_START_VOLUME, dict, op_errstr); @@ -1491,15 +1529,6 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr, } } - /* This is an incremental approach to have all the volinfo objects ref - * count. The first attempt is made in volume start transaction to - * ensure it doesn't race with import volume where stale volume is - * deleted. There are multiple instances of GlusterD crashing in - * bug-948686.t because of this. Once this approach is full proof, all - * other volinfo objects will be refcounted. - */ - glusterd_volinfo_ref (volinfo); - ret = glusterd_validate_volume_id (dict, volinfo); if (ret) goto out; @@ -1829,6 +1858,8 @@ glusterd_handle_heal_cmd (xlator_t *this, glusterd_volinfo_t *volinfo, case GF_SHD_OP_INVALID: case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/ case GF_SHD_OP_HEAL_DISABLE:/* This op should be handled in volume-set*/ + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: /* This op should be handled in volume-set */ + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE: /* This op should be handled in volume-set */ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:/*glfsheal cmd*/ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:/*glfsheal cmd*/ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:/*glfsheal cmd*/ @@ -2634,7 +2665,7 @@ glusterd_op_start_volume (dict_t *dict, char **op_errstr) ret = glusterd_svcs_manager (volinfo); out: - if (!volinfo) + if (volinfo) glusterd_volinfo_unref (volinfo); gf_msg_trace (this->name, 0, "returning %d ", ret); diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c index dd737850c11..bff7e0669ff 100644 --- a/xlators/nfs/server/src/mount3.c +++ b/xlators/nfs/server/src/mount3.c @@ -1059,7 +1059,7 @@ __mnt3_resolve_export_subdir_comp (mnt3_resolve_t *mres) nfs_loc_wipe (&mres->resolveloc); ret = nfs_entry_loc_fill (mres->mstate->nfsx, mres->exp->vol->itable, gfid, nextcomp, &mres->resolveloc, - NFS_RESOLVE_CREATE); + NFS_RESOLVE_CREATE, NULL); if ((ret < 0) && (ret != -2)) { gf_msg (GF_MNT, GF_LOG_ERROR, EFAULT, NFS_MSG_RESOLVE_INODE_FAIL, "Failed to resolve and " @@ -1374,7 +1374,7 @@ __mnt3_resolve_subdir (mnt3_resolve_t *mres) rootgfid[15] = 1; ret = nfs_entry_loc_fill (mres->mstate->nfsx, mres->exp->vol->itable, rootgfid, firstcomp, &mres->resolveloc, - NFS_RESOLVE_CREATE); + NFS_RESOLVE_CREATE, NULL); if ((ret < 0) && (ret != -2)) { gf_msg (GF_MNT, GF_LOG_ERROR, EFAULT, NFS_MSG_RESOLVE_INODE_FAIL, "Failed to resolve and " diff --git a/xlators/nfs/server/src/nfs-common.c b/xlators/nfs/server/src/nfs-common.c index d2d1477fdc5..a39a0e6ee3a 100644 --- a/xlators/nfs/server/src/nfs-common.c +++ b/xlators/nfs/server/src/nfs-common.c @@ -312,7 +312,7 @@ err: */ int nfs_entry_loc_fill (xlator_t *this, inode_table_t *itable, uuid_t pargfid, - char *entry, loc_t *loc, int how) + char *entry, loc_t *loc, int how, gf_boolean_t *freshlookup) { inode_t *parent = NULL; inode_t *entryinode = NULL; @@ -341,8 +341,11 @@ nfs_entry_loc_fill (xlator_t *this, inode_table_t *itable, uuid_t pargfid, * that the caller can use the filled loc to call * lookup. */ - if (!entryinode) + if (!entryinode) { entryinode = inode_new (itable); + if (freshlookup) + *freshlookup = _gf_true; + } /* Cannot change ret because that must * continue to have -2. */ diff --git a/xlators/nfs/server/src/nfs-common.h b/xlators/nfs/server/src/nfs-common.h index 77bdfb0bbf0..8c9a1e33457 100644 --- a/xlators/nfs/server/src/nfs-common.h +++ b/xlators/nfs/server/src/nfs-common.h @@ -57,7 +57,8 @@ nfs_ino_loc_fill (inode_table_t *itable, uuid_t gfid, loc_t *l); extern int nfs_entry_loc_fill (xlator_t *this, inode_table_t *itable, uuid_t pargfid, - char *entry, loc_t *loc, int how); + char *entry, loc_t *loc, int how, + gf_boolean_t *freshlookup); extern int nfs_root_loc_fill (inode_table_t *itable, loc_t *loc); diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c index 5ed57bde0e2..0b977092fbb 100644 --- a/xlators/nfs/server/src/nfs3-helpers.c +++ b/xlators/nfs/server/src/nfs3-helpers.c @@ -3757,8 +3757,9 @@ out: int nfs3_fh_resolve_entry_hard (nfs3_call_state_t *cs) { - int ret = -EFAULT; - nfs_user_t nfu = {0, }; + int ret = -EFAULT; + nfs_user_t nfu = {0, }; + gf_boolean_t freshlookup = _gf_false; if (!cs) return ret; @@ -3771,7 +3772,7 @@ nfs3_fh_resolve_entry_hard (nfs3_call_state_t *cs) ret = nfs_entry_loc_fill (cs->nfsx, cs->vol->itable, cs->resolvefh.gfid, cs->resolventry, &cs->resolvedloc, - NFS_RESOLVE_CREATE); + NFS_RESOLVE_CREATE, &freshlookup); if (ret == -2) { gf_msg_trace (GF_NFS3, 0, "Entry needs lookup: %s", @@ -3782,8 +3783,8 @@ nfs3_fh_resolve_entry_hard (nfs3_call_state_t *cs) * go ahead in the resume callback so that an EEXIST gets * handled at posix without an extra fop at this point. */ - if (nfs3_lookup_op (cs) || - (nfs3_create_op (cs) && !nfs3_create_exclusive_op (cs))) { + if (freshlookup && (nfs3_lookup_op (cs) || + (nfs3_create_op (cs) && !nfs3_create_exclusive_op (cs)))) { cs->lookuptype = GF_NFS3_FRESH; cs->resolve_ret = 0; cs->hardresolved = 0; diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index f370fae850d..7732a9711ae 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -87,6 +87,7 @@ out: /* Don't use 'GF_FREE', this is allocated by libc */ free (rsp.spec); + free (rsp.xdata.xdata_val); return 0; } diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index b26c4946d7f..2e965653e67 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -1534,10 +1534,6 @@ client3_3_inodelk_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } - GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val), - (rsp.xdata.xdata_len), ret, - rsp.op_errno, out); - ret = client_post_inodelk (this, &rsp, &xdata); if (ret < 0) goto out; diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c index 9a023aaa49e..2cabcf9aec0 100644 --- a/xlators/protocol/server/src/server-common.c +++ b/xlators/protocol/server/src/server-common.c @@ -226,6 +226,12 @@ server_post_fallocate (gfs3_fallocate_rsp *rsp, struct iatt *statpre, gf_stat_from_iatt (&rsp->statpost, statpost); } +void +server_post_seek (gfs3_seek_rsp *rsp, off_t offset) +{ + rsp->offset = offset; +} + int server_post_readdirp (gfs3_readdirp_rsp *rsp, gf_dirent_t *entries) { diff --git a/xlators/protocol/server/src/server-common.h b/xlators/protocol/server/src/server-common.h index afd9fb81269..f3b9ced939c 100644 --- a/xlators/protocol/server/src/server-common.h +++ b/xlators/protocol/server/src/server-common.h @@ -81,6 +81,9 @@ void server_post_fallocate (gfs3_fallocate_rsp *rsp, struct iatt *statpre, struct iatt *statpost); +void +server_post_seek (gfs3_seek_rsp *rsp, off_t offset); + int server_post_readdirp (gfs3_readdirp_rsp *rsp, gf_dirent_t *entries); diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index fa160a52d79..d5410573ac3 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -2008,6 +2008,7 @@ server_seek_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } + server_post_seek (&rsp, offset); out: rsp.op_ret = op_ret; rsp.op_errno = gf_errno_to_error (op_errno); |