summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src
diff options
context:
space:
mode:
authorCsaba Henk <csaba@gluster.com>2010-10-26 04:00:29 +0000
committerAnand V. Avati <avati@dev.gluster.com>2010-10-26 23:56:12 -0700
commitdb94ed06a688fb596aba4deafdf59a5af2fd6bbe (patch)
tree84303f0d59270c75ff1e4ad1df1e3466b5cadde8 /libglusterfs/src
parent9f14b0a0ef26b6d41b61222dcf34fe7cdf46cb46 (diff)
libglusterfs, glusterfsd: add shortname resolution + optname hinting support to VOLUME SET
Trie code used for hinting is contributed by Avati. Signed-off-by: Csaba Henk <csaba@lowlife.hu> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 1750 (clean up volgen) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1750
Diffstat (limited to 'libglusterfs/src')
-rw-r--r--libglusterfs/src/Makefile.am4
-rw-r--r--libglusterfs/src/trie-mem-types.h34
-rw-r--r--libglusterfs/src/trie.c397
-rw-r--r--libglusterfs/src/trie.h60
4 files changed, 493 insertions, 2 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 71a088d98eb..619ccf158a1 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -6,9 +6,9 @@ libglusterfs_la_LIBADD = @LEXLIB@
lib_LTLIBRARIES = libglusterfs.la
-libglusterfs_la_SOURCES = dict.c graph.lex.c y.tab.c xlator.c logging.c hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c iobuf.c globals.c statedump.c stack.c checksum.c $(CONTRIBDIR)/md5/md5.c $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/tst_uuid.c $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c graph-print.c
+libglusterfs_la_SOURCES = dict.c graph.lex.c y.tab.c xlator.c logging.c hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c iobuf.c globals.c statedump.c stack.c checksum.c $(CONTRIBDIR)/md5/md5.c $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/tst_uuid.c $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c graph-print.c trie.c
-noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h checksum.h $(CONTRIBDIR)/md5/md5.h $(CONTRIBDIR)/rbtree/rb.h rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h $(CONTRIBDIR)/uuid/uuid_types.h syncop.h graph-utils.h graph-mem-types.h
+noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h checksum.h $(CONTRIBDIR)/md5/md5.h $(CONTRIBDIR)/rbtree/rb.h rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h $(CONTRIBDIR)/uuid/uuid_types.h syncop.h graph-utils.h graph-mem-types.h trie.h trie-mem-types.h
EXTRA_DIST = graph.l graph.y
diff --git a/libglusterfs/src/trie-mem-types.h b/libglusterfs/src/trie-mem-types.h
new file mode 100644
index 00000000000..2b21f67e1a8
--- /dev/null
+++ b/libglusterfs/src/trie-mem-types.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __TRIE_MEM_TYPES_H__
+#define __TRIE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum gf_trie_mem_types_ {
+ gf_trie_mt_trie = GF_MEM_TYPE_START,
+ gf_trie_mt_data,
+ gf_trie_mt_node,
+ gf_trie_mt_buf,
+ gf_trie_mt_end,
+};
+#endif
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c
new file mode 100644
index 00000000000..2501e71a540
--- /dev/null
+++ b/libglusterfs/src/trie.c
@@ -0,0 +1,397 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "common-utils.h"
+#include "trie-mem-types.h"
+#include "trie.h"
+
+#define DISTANCE_EDIT 1
+#define DISTANCE_INS 1
+#define DISTANCE_DEL 1
+
+
+struct trienode {
+ char id;
+ char eow;
+ int depth;
+ void *data;
+ struct trie *trie;
+ struct trienode *parent;
+ struct trienode *subnodes[255];
+};
+
+struct trie {
+ struct trienode root;
+ int nodecnt;
+ size_t len;
+};
+
+
+trie_t *
+trie_new ()
+{
+ trie_t *trie = NULL;
+
+ trie = GF_CALLOC (1, sizeof (*trie), gf_trie_mt_trie);
+ if (!trie)
+ return NULL;
+
+ trie->root.trie = trie;
+
+ return trie;
+}
+
+
+static trienode_t *
+trie_subnode (trienode_t *node, int id)
+{
+ trienode_t *subnode = NULL;
+
+ subnode = node->subnodes[id];
+ if (!subnode) {
+ subnode = GF_CALLOC (1, sizeof (*subnode), gf_trie_mt_node);
+ if (!subnode)
+ return NULL;
+
+ subnode->id = id;
+ subnode->depth = node->depth + 1;
+ node->subnodes[id] = subnode;
+ subnode->parent = node;
+ subnode->trie = node->trie;
+ node->trie->nodecnt++;
+ }
+
+ return subnode;
+}
+
+
+int
+trie_add (trie_t *trie, const char *dword)
+{
+ trienode_t *node = NULL;
+ int i = 0;
+ char id = 0;
+ trienode_t *subnode = NULL;
+
+ node = &trie->root;
+
+ for (i = 0; i < strlen (dword); i++) {
+ id = dword[i];
+
+ subnode = trie_subnode (node, id);
+ if (!subnode)
+ return -1;
+ node = subnode;
+ }
+
+ node->eow = 1;
+
+ return 0;
+}
+
+static void
+trienode_free (trienode_t *node)
+{
+ trienode_t *trav = NULL;
+ int i = 0;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+
+ if (trav)
+ trienode_free (trav);
+ }
+
+ if (node->data)
+ GF_FREE (node->data);
+ GF_FREE (node);
+}
+
+
+void
+trie_destroy (trie_t *trie)
+{
+ trienode_free ((trienode_t *)trie);
+}
+
+
+void
+trie_destroy_bynode (trienode_t *node)
+{
+ trie_destroy (node->trie);
+}
+
+
+static int
+trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
+{
+ trienode_t *trav = NULL;
+ int i = 0;
+ int cret = 0;
+ int ret = 0;
+
+ if (!eowonly || node->eow)
+ ret = fn (node, data);
+
+ if (ret)
+ goto out;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+ if (!trav)
+ continue;
+
+ cret = trienode_walk (trav, fn, data, eowonly);
+ if (cret < 0) {
+ ret = cret;
+ goto out;
+ }
+ ret += cret;
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
+{
+ return trienode_walk (&trie->root, fn, data, eowonly);
+}
+
+
+static void
+print_node (trienode_t *node, char **buf)
+{
+ if (!node->parent)
+ return;
+
+ if (node->parent) {
+ print_node (node->parent, buf);
+ *(*buf)++ = node->id;
+ }
+}
+
+
+int
+trienode_get_word (trienode_t *node, char **bufp)
+{
+ char *buf = NULL;
+
+ buf = GF_CALLOC (1, node->depth + 1, gf_trie_mt_buf);
+ if (!buf)
+ return -1;
+ *bufp = buf;
+
+ print_node (node, &buf);
+
+ return 0;
+}
+
+
+static int
+calc_dist (trienode_t *node, void *data)
+{
+ const char *word = NULL;
+ int i = 0;
+ int *row = NULL;
+ int *uprow = NULL;
+ int distu = 0;
+ int distl = 0;
+ int distul = 0;
+
+ word = data;
+
+ node->data = GF_CALLOC (node->trie->len, sizeof (int), gf_trie_mt_data);
+ if (!node->data)
+ return -1;
+ row = node->data;
+
+ if (!node->parent) {
+ for (i = 0; i < node->trie->len; i++)
+ row[i] = i+1;
+
+ return 0;
+ }
+
+ uprow = node->parent->data;
+
+ distu = node->depth; /* up node */
+ distul = node->parent->depth; /* up-left node */
+
+ for (i = 0; i < node->trie->len; i++) {
+ distl = uprow[i]; /* left node */
+
+ if (word[i] == node->id)
+ row[i] = distul;
+ else
+ row[i] = min ((distul + DISTANCE_EDIT),
+ min ((distu + DISTANCE_DEL),
+ (distl + DISTANCE_INS)));
+
+ distu = row[i];
+ distul = distl;
+ }
+
+ return 0;
+}
+
+
+int
+trienode_get_dist (trienode_t *node)
+{
+ int *row = NULL;
+
+ row = node->data;
+
+ return row[node->trie->len - 1];
+}
+
+
+struct trienodevec_w {
+ struct trienodevec *vec;
+ const char *word;
+};
+
+
+static void
+trienodevec_clear (struct trienodevec *nodevec)
+{
+ memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt);
+}
+
+
+static int
+collect_closest (trienode_t *node, void *data)
+{
+ struct trienodevec_w *nodevec_w = NULL;
+ struct trienodevec *nodevec = NULL;
+ int dist = 0;
+ int i = 0;
+
+ nodevec_w = data;
+ nodevec = nodevec_w->vec;
+
+ if (calc_dist (node, (void *)nodevec_w->word))
+ return -1;
+
+ if (!node->eow || !nodevec->cnt)
+ return 0;
+
+ dist = trienode_get_dist (node);
+
+ /*
+ * I thought that when descending further after some dictionary word dw,
+ * if we see that child's distance is bigger than it was for dw, then we
+ * can prune this branch, as it can contain only worse nodes.
+ *
+ * This conjecture fails, see eg:
+ *
+ * d("AB", "B") = 1;
+ * d("AB", "BA") = 2;
+ * d("AB", "BAB") = 1;
+ *
+ * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
+ * miss "BAB".
+ *
+ * (example courtesy of Richard Bann <richardbann at gmail.com>)
+
+ if (node->parent->eow && dist > trienode_get_dist (node->parent))
+ return 1;
+
+ */
+
+ if (nodevec->nodes[0] &&
+ dist < trienode_get_dist (nodevec->nodes[0])) {
+ /* improving over the findings so far */
+ trienodevec_clear (nodevec);
+ nodevec->nodes[0] = node;
+ } else if (!nodevec->nodes[0] ||
+ dist == trienode_get_dist (nodevec->nodes[0])) {
+ /* as good as the best so far, add if there is free space */
+ for (i = 0; i < nodevec->cnt; i++) {
+ if (!nodevec->nodes[i]) {
+ nodevec->nodes[i] = node;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
+ int nodecnt)
+{
+ struct trienodevec nodevec = {0,};
+
+ nodevec.nodes = nodes;
+ nodevec.cnt = nodecnt;
+
+ return trie_measure_vec (trie, word, &nodevec);
+}
+
+
+int
+trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec)
+{
+ struct trienodevec_w nodevec_w = {0,};
+ int ret = 0;
+
+ trie->len = strlen (word);
+
+ trienodevec_clear (nodevec);
+ nodevec_w.vec = nodevec;
+ nodevec_w.word = word;
+
+ ret = trie_walk (trie, collect_closest, &nodevec_w, 0);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+
+static int
+trienode_reset (trienode_t *node, void *data)
+{
+ if (node->data)
+ GF_FREE (node->data);
+
+ return 0;
+}
+
+
+void
+trie_reset_search (trie_t *trie)
+{
+ trie->len = 0;
+
+ trie_walk (trie, trienode_reset, NULL, 0);
+}
diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/trie.h
new file mode 100644
index 00000000000..a670c2a277c
--- /dev/null
+++ b/libglusterfs/src/trie.h
@@ -0,0 +1,60 @@
+/*
+ Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _TRIE_H_
+#define _TRIE_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+struct trienode;
+typedef struct trienode trienode_t;
+
+struct trie;
+typedef struct trie trie_t;
+
+struct trienodevec {
+ trienode_t **nodes;
+ unsigned cnt;
+};
+
+
+trie_t *trie_new ();
+
+int trie_add (trie_t *trie, const char *word);
+
+void trie_destroy (trie_t *trie);
+
+void trie_destroy_bynode (trienode_t *node);
+
+int trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
+ int nodecnt);
+
+int trie_measure_vec (trie_t *trie, const char *word,
+ struct trienodevec *nodevec);
+
+void trie_reset_search (trie_t *trie);
+
+int trienode_get_dist (trienode_t *node);
+
+int trienode_get_word (trienode_t *node, char **buf);
+
+#endif