summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src/trie.c
diff options
context:
space:
mode:
authorCsaba Henk <csaba@gluster.com>2010-10-26 04:00:29 +0000
committerAnand V. Avati <avati@dev.gluster.com>2010-10-26 23:56:12 -0700
commitdb94ed06a688fb596aba4deafdf59a5af2fd6bbe (patch)
tree84303f0d59270c75ff1e4ad1df1e3466b5cadde8 /libglusterfs/src/trie.c
parent9f14b0a0ef26b6d41b61222dcf34fe7cdf46cb46 (diff)
libglusterfs, glusterfsd: add shortname resolution + optname hinting support to VOLUME SET
Trie code used for hinting is contributed by Avati. Signed-off-by: Csaba Henk <csaba@lowlife.hu> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 1750 (clean up volgen) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1750
Diffstat (limited to 'libglusterfs/src/trie.c')
-rw-r--r--libglusterfs/src/trie.c397
1 files changed, 397 insertions, 0 deletions
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c
new file mode 100644
index 00000000000..2501e71a540
--- /dev/null
+++ b/libglusterfs/src/trie.c
@@ -0,0 +1,397 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "common-utils.h"
+#include "trie-mem-types.h"
+#include "trie.h"
+
+#define DISTANCE_EDIT 1
+#define DISTANCE_INS 1
+#define DISTANCE_DEL 1
+
+
+struct trienode {
+ char id;
+ char eow;
+ int depth;
+ void *data;
+ struct trie *trie;
+ struct trienode *parent;
+ struct trienode *subnodes[255];
+};
+
+struct trie {
+ struct trienode root;
+ int nodecnt;
+ size_t len;
+};
+
+
+trie_t *
+trie_new ()
+{
+ trie_t *trie = NULL;
+
+ trie = GF_CALLOC (1, sizeof (*trie), gf_trie_mt_trie);
+ if (!trie)
+ return NULL;
+
+ trie->root.trie = trie;
+
+ return trie;
+}
+
+
+static trienode_t *
+trie_subnode (trienode_t *node, int id)
+{
+ trienode_t *subnode = NULL;
+
+ subnode = node->subnodes[id];
+ if (!subnode) {
+ subnode = GF_CALLOC (1, sizeof (*subnode), gf_trie_mt_node);
+ if (!subnode)
+ return NULL;
+
+ subnode->id = id;
+ subnode->depth = node->depth + 1;
+ node->subnodes[id] = subnode;
+ subnode->parent = node;
+ subnode->trie = node->trie;
+ node->trie->nodecnt++;
+ }
+
+ return subnode;
+}
+
+
+int
+trie_add (trie_t *trie, const char *dword)
+{
+ trienode_t *node = NULL;
+ int i = 0;
+ char id = 0;
+ trienode_t *subnode = NULL;
+
+ node = &trie->root;
+
+ for (i = 0; i < strlen (dword); i++) {
+ id = dword[i];
+
+ subnode = trie_subnode (node, id);
+ if (!subnode)
+ return -1;
+ node = subnode;
+ }
+
+ node->eow = 1;
+
+ return 0;
+}
+
+static void
+trienode_free (trienode_t *node)
+{
+ trienode_t *trav = NULL;
+ int i = 0;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+
+ if (trav)
+ trienode_free (trav);
+ }
+
+ if (node->data)
+ GF_FREE (node->data);
+ GF_FREE (node);
+}
+
+
+void
+trie_destroy (trie_t *trie)
+{
+ trienode_free ((trienode_t *)trie);
+}
+
+
+void
+trie_destroy_bynode (trienode_t *node)
+{
+ trie_destroy (node->trie);
+}
+
+
+static int
+trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
+{
+ trienode_t *trav = NULL;
+ int i = 0;
+ int cret = 0;
+ int ret = 0;
+
+ if (!eowonly || node->eow)
+ ret = fn (node, data);
+
+ if (ret)
+ goto out;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+ if (!trav)
+ continue;
+
+ cret = trienode_walk (trav, fn, data, eowonly);
+ if (cret < 0) {
+ ret = cret;
+ goto out;
+ }
+ ret += cret;
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
+{
+ return trienode_walk (&trie->root, fn, data, eowonly);
+}
+
+
+static void
+print_node (trienode_t *node, char **buf)
+{
+ if (!node->parent)
+ return;
+
+ if (node->parent) {
+ print_node (node->parent, buf);
+ *(*buf)++ = node->id;
+ }
+}
+
+
+int
+trienode_get_word (trienode_t *node, char **bufp)
+{
+ char *buf = NULL;
+
+ buf = GF_CALLOC (1, node->depth + 1, gf_trie_mt_buf);
+ if (!buf)
+ return -1;
+ *bufp = buf;
+
+ print_node (node, &buf);
+
+ return 0;
+}
+
+
+static int
+calc_dist (trienode_t *node, void *data)
+{
+ const char *word = NULL;
+ int i = 0;
+ int *row = NULL;
+ int *uprow = NULL;
+ int distu = 0;
+ int distl = 0;
+ int distul = 0;
+
+ word = data;
+
+ node->data = GF_CALLOC (node->trie->len, sizeof (int), gf_trie_mt_data);
+ if (!node->data)
+ return -1;
+ row = node->data;
+
+ if (!node->parent) {
+ for (i = 0; i < node->trie->len; i++)
+ row[i] = i+1;
+
+ return 0;
+ }
+
+ uprow = node->parent->data;
+
+ distu = node->depth; /* up node */
+ distul = node->parent->depth; /* up-left node */
+
+ for (i = 0; i < node->trie->len; i++) {
+ distl = uprow[i]; /* left node */
+
+ if (word[i] == node->id)
+ row[i] = distul;
+ else
+ row[i] = min ((distul + DISTANCE_EDIT),
+ min ((distu + DISTANCE_DEL),
+ (distl + DISTANCE_INS)));
+
+ distu = row[i];
+ distul = distl;
+ }
+
+ return 0;
+}
+
+
+int
+trienode_get_dist (trienode_t *node)
+{
+ int *row = NULL;
+
+ row = node->data;
+
+ return row[node->trie->len - 1];
+}
+
+
+struct trienodevec_w {
+ struct trienodevec *vec;
+ const char *word;
+};
+
+
+static void
+trienodevec_clear (struct trienodevec *nodevec)
+{
+ memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt);
+}
+
+
+static int
+collect_closest (trienode_t *node, void *data)
+{
+ struct trienodevec_w *nodevec_w = NULL;
+ struct trienodevec *nodevec = NULL;
+ int dist = 0;
+ int i = 0;
+
+ nodevec_w = data;
+ nodevec = nodevec_w->vec;
+
+ if (calc_dist (node, (void *)nodevec_w->word))
+ return -1;
+
+ if (!node->eow || !nodevec->cnt)
+ return 0;
+
+ dist = trienode_get_dist (node);
+
+ /*
+ * I thought that when descending further after some dictionary word dw,
+ * if we see that child's distance is bigger than it was for dw, then we
+ * can prune this branch, as it can contain only worse nodes.
+ *
+ * This conjecture fails, see eg:
+ *
+ * d("AB", "B") = 1;
+ * d("AB", "BA") = 2;
+ * d("AB", "BAB") = 1;
+ *
+ * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
+ * miss "BAB".
+ *
+ * (example courtesy of Richard Bann <richardbann at gmail.com>)
+
+ if (node->parent->eow && dist > trienode_get_dist (node->parent))
+ return 1;
+
+ */
+
+ if (nodevec->nodes[0] &&
+ dist < trienode_get_dist (nodevec->nodes[0])) {
+ /* improving over the findings so far */
+ trienodevec_clear (nodevec);
+ nodevec->nodes[0] = node;
+ } else if (!nodevec->nodes[0] ||
+ dist == trienode_get_dist (nodevec->nodes[0])) {
+ /* as good as the best so far, add if there is free space */
+ for (i = 0; i < nodevec->cnt; i++) {
+ if (!nodevec->nodes[i]) {
+ nodevec->nodes[i] = node;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
+ int nodecnt)
+{
+ struct trienodevec nodevec = {0,};
+
+ nodevec.nodes = nodes;
+ nodevec.cnt = nodecnt;
+
+ return trie_measure_vec (trie, word, &nodevec);
+}
+
+
+int
+trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec)
+{
+ struct trienodevec_w nodevec_w = {0,};
+ int ret = 0;
+
+ trie->len = strlen (word);
+
+ trienodevec_clear (nodevec);
+ nodevec_w.vec = nodevec;
+ nodevec_w.word = word;
+
+ ret = trie_walk (trie, collect_closest, &nodevec_w, 0);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+
+static int
+trienode_reset (trienode_t *node, void *data)
+{
+ if (node->data)
+ GF_FREE (node->data);
+
+ return 0;
+}
+
+
+void
+trie_reset_search (trie_t *trie)
+{
+ trie->len = 0;
+
+ trie_walk (trie, trienode_reset, NULL, 0);
+}