distribute: add hash-name-regex option

This is to support the common "write to temp file then rename" idiom. In our case this causes us to create a linkfile during the rename in (N-1)/N cases, with a significant impact on performance e.g. for UFO which uses this idiom. This patch allows the user to specify up to two regular expressions that separate the permanent and transient parts of a temp-file name: rsync-hash-regex reimplements the existing RSYNC_FRIENDLY_NAME pattern where the temporary name for EXAMPLE is .EXAMPLE.suffix extra-hash-regex can be used for a second pattern, active concurrently, and can include alternatives via the POSIX extended-regex syntax Change-Id: Ic1a6ed19324bc31fefe91ee34b8478877a9c5d2c BUG: 912564 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/4116 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
author: Jeff Darcy <jdarcy@redhat.com> 2013-02-18 13:16:39 -0500
committer: Anand Avati <avati@redhat.com> 2013-02-18 20:50:48 -0800
commit: 59ac567c8b5ebf20b573ecf250e13f841e817dbd (patch)
tree: b16dd6c33140c1ec571145b64bae574e9be0c76d
parent: 6ff25bc98193fd39e25acfce8e3b2f3b3d80a9c8 (diff)
7 files changed, 227 insertions, 24 deletions
diff --git a/tests/bugs/bug-912564.t b/tests/bugs/bug-912564.t
new file mode 100755
index 000000000..b24268fbc
--- /dev/null
+++ b/tests/bugs/bug-912564.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+# Test that the rsync and "extra" regexes cause rename-in-place without
+# creating linkfiles, when they're supposed to.  Without the regex we'd have a
+# 1/4 chance of each file being assigned to the right place, so with 16 files
+# we have a 1/2^32 chance of getting the correct result by accident.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function count_linkfiles {
+	local i
+	local count=0
+	for i in $(seq $2 $3); do
+		x=$(find $1$i -perm -1000 | wc -l)
+		# Divide by two because of the .glusterfs links.
+		count=$((count+x/2))
+	done
+	echo $count
+}
+
+# This function only exists to get around quoting difficulties in TEST.
+function set_regex {
+	$CLI volume set $1 cluster.extra-hash-regex '^foo(.+)bar$'
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+			    $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V0
+EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status';
+
+# Mount it.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Make sure the rsync regex works, by verifying that no linkfiles are
+# created.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+	fn=$(printf file%x $i)
+	tmp_fn=$(printf .%s.%d $fn $RANDOM)
+	echo testing > $M0/$tmp_fn
+	mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Make sure that linkfiles *are* created for normal files.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+	fn=$(printf file%x $i)
+	tmp_fn=$(printf foo%sbar $fn)
+	echo testing > $M0/$tmp_fn
+	mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -ne "0" ]
+
+# Make sure that setting an extra regex suppresses the linkfiles.
+TEST set_regex $V0
+rm -f $M0/file*
+for i in $(seq 0 15); do
+	fn=$(printf file%x $i)
+	tmp_fn=$(printf foo%sbar $fn)
+	echo testing > $M0/$tmp_fn
+	mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Re-test the rsync regex, to make sure the extra one didn't break it.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+	fn=$(printf file%x $i)
+	tmp_fn=$(printf .%s.%d $fn $RANDOM)
+	echo testing > $M0/$tmp_fn
+	mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+cleanup
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 8f266ef8e..bd00089fc 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -13,6 +13,8 @@
 #include "config.h"
 #endif
 
+#include <regex.h>
+
 #include "dht-mem-types.h"
 #include "libxlator.h"
 #include "syncop.h"
@@ -275,6 +277,12 @@ struct dht_conf {
         /* Request to filter directory entries in readdir request */
 
         gf_boolean_t    readdir_optimize;
+
+        /* Support regex-based name reinterpretation. */
+        regex_t         rsync_regex;
+        gf_boolean_t    rsync_regex_valid;
+        regex_t         extra_regex;
+        gf_boolean_t    extra_regex_valid;
 };
 typedef struct dht_conf dht_conf_t;
 
@@ -420,7 +428,7 @@ xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
 xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
 int       dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
 
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
 
 int dht_linkfile_create (call_frame_t    *frame, fop_mknod_cbk_t linkfile_cbk,
                          xlator_t        *tovol, xlator_t *fromvol, loc_t *loc);
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 97eb1f05f..2276ca116 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -44,30 +44,68 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
 }
 
 
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do {          \
-                rsync_frndly_name = (char *) name;                      \
-                if (name[0] == '.') {                                   \
-                        char *dot   = 0;                                \
-                        int namelen = 0;                                \
-                                                                        \
-                        dot = strrchr (name, '.');                      \
-                        if (dot && dot > (name + 1) && *(dot + 1)) {    \
-                                namelen = (dot - name);                 \
-                                rsync_frndly_name = alloca (namelen);   \
-                                strncpy (rsync_frndly_name, name + 1,   \
-                                         namelen);                      \
-                                rsync_frndly_name[namelen - 1] = 0;     \
-                        }                                               \
-                }                                                       \
-        } while (0);
+static inline
+gf_boolean_t
+dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+{
+        regmatch_t      matches[2];
+        size_t          new_len;
 
+        if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
+                if (matches[1].rm_so != -1) {
+                        new_len = matches[1].rm_eo - matches[1].rm_so;
+                        /* Equal would fail due to the NUL at the end. */
+                        if (new_len < len) {
+                                memcpy (modified,original+matches[1].rm_so,
+                                        new_len);
+                                modified[new_len] = '\0';
+                                return _gf_true;
+                        }
+                }
+        }
+
+        /* This is guaranteed safe because of how the dest was allocated. */
+        strcpy(modified,original);
+        return _gf_false;
+}
 
 int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
 {
-        char     *rsync_friendly_name = NULL;
+        char            *rsync_friendly_name    = NULL;
+        dht_conf_t      *priv                   = this->private;
+        size_t           len                    = 0;
+        gf_boolean_t     munged                 = _gf_false;
+
+        /*
+         * It wouldn't be safe to use alloca in an inline function that doesn't
+         * actually get inlined, and it wouldn't be efficient to do a real
+         * allocation, so we use alloca here (if needed) and pass that to the
+         * inline.
+         */
 
-        MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+        if (priv->extra_regex_valid) {
+                len = strlen(name) + 1;
+                rsync_friendly_name = alloca(len);
+                munged = dht_munge_name (name, rsync_friendly_name, len,
+                                         &priv->extra_regex);
+        }
+
+        if (!munged && priv->rsync_regex_valid) {
+                len = strlen(name) + 1;
+                rsync_friendly_name = alloca(len);
+                gf_log (this->name, GF_LOG_INFO, "trying regex for %s", name);
+                munged = dht_munge_name (name, rsync_friendly_name, len,
+                                         &priv->rsync_regex);
+                if (munged) {
+                        gf_log (this->name, GF_LOG_INFO,
+                                "munged down to %s", rsync_friendly_name);
+                }
+        }
+
+        if (!munged) {
+                rsync_friendly_name = (char *)name;
+        }
 
         return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
 }
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 34a7475bd..71aa1b70c 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -158,7 +158,7 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
         int        ret = 0;
 
 
-        ret = dht_hash_compute (layout->type, name, &hash);
+        ret = dht_hash_compute (this, layout->type, name, &hash);
         if (ret != 0) {
                 gf_log (this->name, GF_LOG_WARNING,
                         "hash computation failed for type=%d name=%s",
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 77afde82e..22c61130f 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -509,7 +509,7 @@ dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
         uint32_t      hashval = 0;
         int           ret = 0;
 
-        ret = dht_hash_compute (layout->type, loc->path, &hashval);
+        ret = dht_hash_compute (this, layout->type, loc->path, &hashval);
         if (ret == 0) {
                 start = (hashval % layout->cnt);
         }
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index bb4d70ec8..784ed920e 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -284,6 +284,39 @@ out:
         return ret;
 }
 
+void
+dht_init_regex (xlator_t *this, dict_t *odict, char *name,
+                regex_t *re, gf_boolean_t *re_valid)
+{
+        char    *temp_str;
+
+        if (dict_get_str (odict, name, &temp_str) != 0) {
+                if (strcmp(name,"rsync-hash-regex")) {
+                        return;
+                }
+                temp_str = "^\\.(.+)\\.[^.]+$";
+        }
+
+        if (*re_valid) {
+                regfree(re);
+                *re_valid = _gf_false;
+        }
+
+        if (!strcmp(temp_str,"none")) {
+                return;
+        }
+
+        if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
+                gf_log (this->name, GF_LOG_INFO,
+                        "using regex %s = %s", name, temp_str);
+                *re_valid = _gf_true;
+        }
+        else {
+                gf_log (this->name, GF_LOG_WARNING,
+                        "compiling regex %s failed", temp_str);
+        }
+}
+
 int
 reconfigure (xlator_t *this, dict_t *options)
 {
@@ -349,12 +382,16 @@ reconfigure (xlator_t *this, dict_t *options)
                         goto out;
         }
 
+        dht_init_regex (this, options, "rsync-hash-regex",
+                        &conf->rsync_regex, &conf->rsync_regex_valid);
+        dht_init_regex (this, options, "extra-hash-regex",
+                        &conf->extra_regex, &conf->extra_regex_valid);
+
         ret = 0;
 out:
         return ret;
 }
 
-
 int
 init (xlator_t *this)
 {
@@ -466,6 +503,11 @@ init (xlator_t *this)
                         goto err;
         }
 
+        dht_init_regex (this, this->options, "rsync-hash-regex",
+                        &conf->rsync_regex, &conf->rsync_regex_valid);
+        dht_init_regex (this, this->options, "extra-hash-regex",
+                        &conf->extra_regex, &conf->extra_regex_valid);
+
         ret = dht_layouts_init (this, conf);
         if (ret == -1) {
                 goto err;
@@ -643,6 +685,20 @@ struct volume_options options[] = {
           "that allows DHT to requests non-first subvolumes to filter out "
           "directory entries."
         },
+        { .key = {"rsync-hash-regex"},
+          .type = GF_OPTION_TYPE_STR,
+          /* Setting a default here doesn't work.  See dht_init_regex. */
+          .description = "Regular expression for stripping temporary-file "
+          "suffix and prefix used by rsync, to prevent relocation when the "
+          "file is renamed."
+        },
+        { .key = {"extra-hash-regex"},
+          .type = GF_OPTION_TYPE_STR,
+          /* Setting a default here doesn't work.  See dht_init_regex. */
+          .description = "Regular expression for stripping temporary-file "
+          "suffix and prefix used by an application, to prevent relocation when "
+          "the file is renamed."
+        },
 
         { .key  = {NULL} },
 };
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 2364e0da7..41719ec89 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -105,7 +105,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .type        = NO_DOC,
           .op_version  = 2
         },
-
+        { .key         = "cluster.rsync-hash-regex",
+          .voltype     = "cluster/distribute",
+          .type        = NO_DOC,
+          .op_version  = 2
+        },
+        { .key         = "cluster.extra-hash-regex",
+          .voltype     = "cluster/distribute",
+          .type        = NO_DOC,
+          .op_version  = 2
+        },
 
         /* AFR xlator options */
         { .key         = "cluster.entry-change-log",
author	Jeff Darcy <jdarcy@redhat.com>	2013-02-18 13:16:39 -0500
committer	Anand Avati <avati@redhat.com>	2013-02-18 20:50:48 -0800
commit	59ac567c8b5ebf20b573ecf250e13f841e817dbd (patch)
tree	b16dd6c33140c1ec571145b64bae574e9be0c76d
parent	6ff25bc98193fd39e25acfce8e3b2f3b3d80a9c8 (diff)