summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configure.ac1
-rw-r--r--extras/Makefile.am2
-rw-r--r--extras/defrag.sh60
-rw-r--r--extras/glusterfs-defrag.in109
-rw-r--r--extras/scale-n-defrag.sh37
-rw-r--r--extras/volgen/CreateVolfile.py1
-rw-r--r--libglusterfs/src/glusterfs.h3
-rw-r--r--xlators/cluster/dht/src/dht-common.c217
-rw-r--r--xlators/cluster/dht/src/dht-common.h5
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c8
-rw-r--r--xlators/storage/posix/src/posix.c55
-rw-r--r--xlators/storage/posix/src/posix.h2
12 files changed, 373 insertions, 127 deletions
diff --git a/configure.ac b/configure.ac
index 4eaf052..16ea31e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -114,6 +114,7 @@ AC_CONFIG_FILES([Makefile
doc/examples/Makefile
doc/hacker-guide/Makefile
extras/Makefile
+ extras/glusterfs-defrag
extras/init.d/Makefile
extras/init.d/glusterfs-server.plist
extras/init.d/glusterfsd-Debian
diff --git a/extras/Makefile.am b/extras/Makefile.am
index c111371..6ea4744 100644
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -3,6 +3,8 @@ docdir = $(datadir)/doc/glusterfs/
EditorModedir = $(docdir)/
EditorMode_DATA = glusterfs-mode.el glusterfs.vim
+dist_bin_SCRIPTS = glusterfs-defrag
+
SUBDIRS = init.d benchmarking volgen
EXTRA_DIST = specgen.scm MacOSX/Portfile glusterfs-mode.el glusterfs.vim migrate-unify-to-distribute.sh backend-xattr-sanitize.sh backend-cleanup.sh defrag.sh scale-n-defrag.sh disk_usage_sync.sh
diff --git a/extras/defrag.sh b/extras/defrag.sh
deleted file mode 100644
index 465b097..0000000
--- a/extras/defrag.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/sh
-
-# This script gets called from 'scale-n-defrag.sh' script.
-# Don't run this stand alone.
-#
-#
-
-set -e
-
-CP="cp"
-MV="mv"
-
-scan_dir()
-{
- path=$1;
- find "$path" -type f -perm +01000 -exec $0 '{}' \;
-}
-
-rsync_filename()
-{
- path=$1
- dir=$(dirname "$path");
- file=$(basename "$path");
-
- echo "$dir/.$file.zr$$";
-}
-
-relocate_file()
-{
- path=$1;
- tmp_path=$(rsync_filename "$path");
-
- pre_mtime=$(stat -c '%Y' "$path");
- $CP -a "$path" "$tmp_path";
- post_mtime=$(stat -c '%Y' "$path");
-
- if [ $pre_mtime = $post_mtime ]; then
- chmod -t "$tmp_path";
- $MV "$tmp_path" "$path";
- echo "file '$path' relocated"
- else
- echo "file '$path' modified during defrag. skipping"
- rm -f "$tmp_path";
- fi
-}
-
-main()
-{
- path="$1";
-
- if [ -d "$path" ]; then
- scan_dir "$path";
- else
- relocate_file "$@";
- fi
-
- usleep 500000 # 500ms
-}
-
-main "$1"
diff --git a/extras/glusterfs-defrag.in b/extras/glusterfs-defrag.in
new file mode 100644
index 0000000..982878b
--- /dev/null
+++ b/extras/glusterfs-defrag.in
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+# Please leave 'added_bricks' as empty if you want 100% defrag.
+# If you want to move data to newly added bricks, properly give
+# brick info as "<hostname>:<export-dir>" form (which was given
+# in 'gluster volume create' command)
+# More than one brick can be given with space inbetween.
+
+#
+# (c) 2010 Gluster Inc <http://www.gluster.com/>
+#
+
+set -e;
+
+added_bricks="node1:/gfs/export1"
+
+CP="cp"
+MV="mv"
+
+scan_dir()
+{
+ path=$1;
+ # run defrag on files first #
+ find "$path" -maxdepth 1 -type f -perm +01000 -exec $0 '{}' \;
+
+ for subdir in $(find "$path" -maxdepth 1 -type d | sed 1d); do
+ $0 "$subdir";
+ done
+}
+
+fix_xattr()
+{
+ path=$1;
+ getfattr -n trusted.distribute.fix.layout "$path" 2>/dev/null;
+}
+
+rsync_filename()
+{
+ path=$1
+ dir=$(dirname "$path");
+ file=$(basename "$path");
+
+ echo "$dir/.$file.zr$$";
+}
+
+relocate_file()
+{
+ path=$1;
+ stat_info=$(stat -c '%a' "$path");
+ if [ $stat_info -lt 1000 ] ; then
+ return;
+ fi
+
+ flag=0;
+ linknode=$(getfattr --only-values -n trusted.distribute.linkinfo $path 2>/dev/null);
+ if [ -z $linknode ] ; then
+ return;
+ fi
+
+ for bricks in ${added_bricks}; do
+ current_brick=${linknode:0:${#bricks}};
+ if [ "${bricks}" == "${current_brick}" ]; then
+ flag=1;
+ fi
+ done
+
+ if [ $flag -ne 1 ]; then
+ return;
+ fi
+
+ tmp_path=$(rsync_filename "$path");
+
+ pre_mtime=$(stat -c '%Y' "$path");
+ $CP -a "$path" "$tmp_path";
+ post_mtime=$(stat -c '%Y' "$path");
+
+ if [ $pre_mtime = $post_mtime ]; then
+ chmod -t "$tmp_path";
+ $MV "$tmp_path" "$path";
+ echo "file '$path' relocated"
+ else
+ echo "file '$path' modified during defrag. skipping"
+ rm -f "$tmp_path";
+ fi
+}
+
+defrag_usage()
+{
+ echo "Usage: $0 <directory>"
+}
+
+main()
+{
+ path="$1";
+
+ if [ -z "$path" ]; then
+ defrag_usage;
+ return;
+ fi
+
+ if [ -d "$path" ]; then
+ fix_xattr "$path";
+ scan_dir "$path";
+ else
+ relocate_file "$@";
+ fi
+}
+
+main "$1"
diff --git a/extras/scale-n-defrag.sh b/extras/scale-n-defrag.sh
deleted file mode 100644
index 1031b39..0000000
--- a/extras/scale-n-defrag.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/sh
-
-# This script runs over the GlusterFS mountpoint (from just one client)
-# to handle the distribution of 'data', after the distribute translator's
-# subvolumes count changes.
-#
-# (c) 2009 Gluster Inc, <http://www.gluster.com/>
-#
-#
-# Make sure the following variables are properly initialized
-
-MOUNTPOINT=/tmp/testdir
-directory_to_be_scaled="${MOUNTPOINT}/"
-
-logdir=$(dirname $0)
-cd $logdir
-LOGDIR=$(pwd)
-cd -
-
-# The below command is enough to make sure the new layout will be scaled across new
-# nodes.
-find ${directory_to_be_scaled} -type d -exec setfattr -x "trusted.glusterfs.dht" {} \;
-
-# Now do a lookup on files so the scaling/re-hashing is done
-find ${directory_to_be_scaled} > /dev/null
-
-
-# copy the defrag (to copy data across for new nodes (for linkfiles))
-#
-
-
-cd ${directory_to_be_scaled};
-for dir in *; do
- echo "Defragmenting directory ${directory_to_be_scaled}/$dir ($LOGDIR/defrag-store-$dir.log)"
- $LOGDIR/defrag.sh $dir >> $LOGDIR/defrag-store-$dir.log 2>&1
- echo Completed directory ${directory_to_be_scaled}/$dir
-done
diff --git a/extras/volgen/CreateVolfile.py b/extras/volgen/CreateVolfile.py
index ca5043a..378766cf 100644
--- a/extras/volgen/CreateVolfile.py
+++ b/extras/volgen/CreateVolfile.py
@@ -270,6 +270,7 @@ class CreateVolfile:
exp_fd.write ("# option background-unlink yes # (default: no) boolean type\n")
exp_fd.write (" option directory %s\n" % export)
+ exp_fd.write (" option hostname %s\n" % host)
exp_fd.write ("end-volume\n\n")
if self.nfs:
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 8ddb4e7..8dc781d 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -64,6 +64,9 @@
#define O_DIRECTORY 0
#endif
+#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
+#define GF_XATTR_LINKINFO_KEY "trusted.distribute.linkinfo"
+
#define ZR_FILE_CONTENT_STR "glusterfs.file."
#define ZR_FILE_CONTENT_STRLEN 15
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 1cfeae6..dd2a3f3 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1410,6 +1410,127 @@ err:
int
+dht_fix_layout_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ DHT_STACK_UNWIND (getxattr, frame, -1, ENODATA, NULL);
+
+ return 0;
+}
+
+static void
+fill_layout_info (dht_layout_t *layout, char *buf)
+{
+ int i = 0;
+ char tmp_buf[128] = {0,};
+
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf (tmp_buf, 128, "(%s %u %u)",
+ layout->list[i].xlator->name,
+ layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat (buf, " ");
+ strcat (buf, tmp_buf);
+ }
+}
+
+int
+dht_pathinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ int flag = 0;
+ int this_call_cnt = 0;
+ char *value_got = NULL;
+ char layout_buf[8192] = {0,};
+ char xattr_buf[8192 + 1024] = {0,};
+ dict_t *dict = NULL;
+
+ local = frame->local;
+
+ if (op_ret != -1) {
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value_got);
+ if (!ret) {
+ if (!local->pathinfo)
+ local->pathinfo = GF_CALLOC (8192, sizeof (char),
+ gf_common_mt_char);
+ if (local->pathinfo)
+ strcat (local->pathinfo, value_got);
+ }
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->layout->cnt > 1) {
+ /* Set it for directory */
+ fill_layout_info (local->layout, layout_buf);
+ flag = 1;
+ }
+
+ dict = dict_new ();
+
+ if (flag && local->pathinfo)
+ snprintf (xattr_buf, 9216, "((%s %s) (%s-layout %s))",
+ this->name, local->pathinfo, this->name,
+ layout_buf);
+ else if (local->pathinfo)
+ snprintf (xattr_buf, 9216, "(%s %s)",
+ this->name, local->pathinfo);
+ else if (flag)
+ snprintf (xattr_buf, 9216, "(%s-layout %s)",
+ this->name, layout_buf);
+
+ ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY,
+ xattr_buf);
+
+ if (local->pathinfo)
+ GF_FREE (local->pathinfo);
+ GF_FREE (local->key);
+
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+ }
+
+ if (local->pathinfo)
+ strcat (local->pathinfo, " Link: ");
+
+ /* This will happen if there pending */
+ STACK_WIND (frame, dht_pathinfo_getxattr_cbk, local->hashed_subvol,
+ local->hashed_subvol->fops->getxattr,
+ &local->loc, local->key);
+
+ return 0;
+}
+
+int
+dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ if (op_ret != -1) {
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (!ret) {
+ ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value);
+ if (!ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to set linkinfo");
+ }
+ }
+
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
+
+ return 0;
+}
+
+int
dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr)
{
@@ -1429,9 +1550,14 @@ int
dht_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *key)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int ret = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1439,6 +1565,91 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
+ conf = this->private;
+ layout = dht_layout_get (this, loc->inode);
+ if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) {
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ goto err;
+ }
+
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ goto err;
+ }
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ goto err;
+ }
+ local->layout = layout;
+
+ local->call_cnt = 1;
+ if (hashed_subvol != cached_subvol) {
+ local->call_cnt = 2;
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ STACK_WIND (frame, dht_pathinfo_getxattr_cbk, cached_subvol,
+ cached_subvol->fops->getxattr, loc, key);
+
+ return 0;
+ }
+ if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (hashed_subvol == cached_subvol) {
+ op_errno = ENODATA;
+ goto err;
+ }
+ if (hashed_subvol) {
+ STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
+ hashed_subvol->fops->getxattr, loc,
+ GF_XATTR_PATHINFO_KEY);
+ return 0;
+ }
+ op_errno = ENODATA;
+ goto err;
+ }
+ if (key && (strcmp (key, GF_XATTR_FIX_LAYOUT_KEY) == 0)) {
+ if (layout->cnt < conf->subvolume_cnt) {
+ gf_log (this->name, GF_LOG_INFO,
+ "expanding layout of %s from %d to %d",
+ loc->path, layout->cnt, conf->subvolume_cnt);
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ goto err;
+ }
+
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ goto err;
+ }
+ local->layout = layout;
+ dht_selfheal_new_directory (frame, dht_fix_layout_cbk,
+ layout);
+ return 0;
+ }
+ op_errno = ENODATA;
+ goto err;
+ }
subvol = dht_subvol_get_cached (this, loc->inode);
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index b361f14..d5a5c7b 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -27,6 +27,7 @@
#ifndef _DHT_H
#define _DHT_H
+#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout"
#define GF_DHT_LOOKUP_UNHASHED_ON 1
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
@@ -116,6 +117,10 @@ struct dht_local {
int32_t flags;
mode_t mode;
dev_t rdev;
+
+ /* need for file-info */
+ char *pathinfo;
+ char *key;
};
typedef struct dht_local dht_local_t;
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 142ff06..9c9dff0 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -2629,11 +2629,11 @@ fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state->loc.path, strerror (op_errno));
}
} else {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRIu64": %s(%s) %s => -1 (%s)",
frame->root->unique,
- gf_fop_list[frame->root->op], state->loc.path,
- strerror (op_errno));
+ gf_fop_list[frame->root->op], state->name,
+ state->loc.path, strerror (op_errno));
} /* if(op_errno!= ENODATA)...else */
send_fuse_err (this, finh, op_errno);
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 7c0d165..2810bbd 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -3032,6 +3032,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
size_t size = 0;
size_t remaining_size = 0;
char key[1024] = {0,};
+ char host_buf[1024] = {0,};
char gen_key[1024] = {0,};
char * value = NULL;
char * list = NULL;
@@ -3072,18 +3073,18 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
}
if (loc->inode && IA_ISREG (loc->inode->ia_type) && name &&
- (strcmp (name, "trusted.glusterfs.location") == 0)) {
- ret = dict_set_static_ptr (dict,
- "trusted.glusterfs.location",
- priv->hostname);
+ (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {
+ snprintf (host_buf, 1024, "%s:%s", priv->hostname,
+ real_path);
+ ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY,
+ host_buf);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
- "could not set hostname (%s) in dictionary",
- priv->hostname);
+ "could not set value (%s) in dictionary",
+ host_buf);
}
goto done;
}
-
size = sys_llistxattr (real_path, NULL, 0);
if (size == -1) {
@@ -4260,17 +4261,16 @@ mem_acct_init (xlator_t *this)
int
init (xlator_t *this)
{
- int ret = 0;
- int op_ret = -1;
- gf_boolean_t tmp_bool = 0;
- struct stat buf = {0,};
- struct posix_private * _private = NULL;
- data_t * dir_data = NULL;
- data_t * tmp_data = NULL;
- uint64_t time64 = 0;
-
- int dict_ret = 0;
- int32_t janitor_sleep;
+ struct posix_private *_private = NULL;
+ data_t *dir_data = NULL;
+ data_t *tmp_data = NULL;
+ struct stat buf = {0,};
+ gf_boolean_t tmp_bool = 0;
+ uint64_t time64 = 0;
+ int dict_ret = 0;
+ int ret = 0;
+ int op_ret = -1;
+ int32_t janitor_sleep = 0;
dir_data = dict_get (this->options, "directory");
@@ -4370,10 +4370,19 @@ init (xlator_t *this)
LOCK_INIT (&_private->lock);
- ret = gethostname (_private->hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)", strerror (errno));
+ ret = dict_get_str (this->options, "hostname", &_private->hostname);
+ if (ret) {
+ _private->hostname = GF_CALLOC (256, sizeof (char),
+ gf_common_mt_char);
+ if (!_private->hostname) {
+ gf_log (this->name, GF_LOG_ERROR, "not enough memory");
+ goto out;
+ }
+ ret = gethostname (_private->hostname, 256);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)", strerror (errno));
+ }
}
_private->export_statfs = 1;
@@ -4574,6 +4583,8 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL },
{ .key = {"directory"},
.type = GF_OPTION_TYPE_PATH },
+ { .key = {"hostname"},
+ .type = GF_OPTION_TYPE_ANY },
{ .key = {"export-statfs-size"},
.type = GF_OPTION_TYPE_BOOL },
{ .key = {"mandate-attribute"},
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 0295a1f..2aff0a6 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -73,7 +73,7 @@ struct posix_private {
gf_lock_t lock;
- char hostname[256];
+ char *hostname;
/* Statistics, provides activity of the server */
struct timeval prev_fetch_time;