summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-helpers.c
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2014-05-05 12:57:34 +0200
committerVijay Bellur <vbellur@redhat.com>2014-07-11 10:33:40 -0700
commitad112305a1c7452b13c92238b40ded80361838f3 (patch)
tree82dbf9aa0b77eb76d43c8b1ccb3ba58e61bc4e2a /xlators/cluster/ec/src/ec-helpers.c
parent6b4702897bd56e29db4db06f8cf896f89df1133c (diff)
cluster/ec: Added erasure code translator
Change-Id: I293917501d5c2ca4cdc6303df30cf0b568cea361 BUG: 1118629 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/7749 Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-helpers.c')
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c594
1 files changed, 594 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
new file mode 100644
index 00000000000..771faf5b013
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -0,0 +1,594 @@
+/*
+ Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
+
+ This file is part of the cluster/ec translator for GlusterFS.
+
+ The cluster/ec translator for GlusterFS is free software: you can
+ redistribute it and/or modify it under the terms of the GNU General
+ Public License as published by the Free Software Foundation, either
+ version 3 of the License, or (at your option) any later version.
+
+ The cluster/ec translator for GlusterFS is distributed in the hope
+ that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with the cluster/ec translator for GlusterFS. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+
+#include "byte-order.h"
+
+#include "ec-mem-types.h"
+#include "ec-fops.h"
+#include "ec-helpers.h"
+
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
+
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
+
+static const char * ec_fop_list[] =
+{
+ [-EC_FOP_HEAL] = "HEAL"
+};
+
+const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits)
+{
+ str += size;
+
+ if (size-- < 1)
+ {
+ goto failed;
+ }
+ *--str = 0;
+
+ while ((value != 0) || (digits > 0))
+ {
+ if (size-- < 1)
+ {
+ goto failed;
+ }
+ *--str = '0' + (value & 1);
+ digits--;
+ value >>= 1;
+ }
+
+ return str;
+
+failed:
+ return "<buffer too small>";
+}
+
+const char * ec_fop_name(int32_t id)
+{
+ if (id >= 0)
+ {
+ return gf_fop_list[id];
+ }
+
+ return ec_fop_list[-id];
+}
+
+void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
+{
+ char str1[32], str2[32], str3[32];
+ char * msg;
+ ec_t * ec = fop->xl->private;
+ va_list args;
+ int32_t ret;
+
+ va_start(args, fmt);
+ ret = vasprintf(&msg, fmt, args);
+ va_end(args);
+
+ if (ret < 0)
+ {
+ msg = "<memory allocation error>";
+ }
+
+ gf_log("ec", GF_LOG_TRACE, "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] "
+ "frame=%p/%p, min/exp=%d/%d, err=%d state=%d "
+ "{%s:%s:%s} %s",
+ event, ec_fop_name(fop->id), fop, fop->parent, fop->refs,
+ fop->winds, fop->jobs, fop->req_frame, fop->frame, fop->minimum,
+ fop->expected, fop->error, fop->state,
+ ec_bin(str1, sizeof(str1), fop->mask, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->bad, ec->nodes), msg);
+
+ if (ret >= 0)
+ {
+ free(msg);
+ }
+}
+
+uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset)
+{
+ int32_t bits;
+
+ if (offset == -1ULL)
+ {
+ return -1ULL;
+ }
+
+ bits = ec->bits_for_nodes;
+ if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0)
+ {
+ return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx;
+ }
+
+ return (offset * ec->nodes) + idx;
+}
+
+uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset)
+{
+ uint64_t mask = 0;
+
+ if ((offset & TOP_BIT) != 0)
+ {
+ mask = MASK << ec->bits_for_nodes;
+
+ *idx = offset & ~mask;
+ return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS;
+ }
+
+ *idx = offset % ec->nodes;
+
+ return offset / ec->nodes;
+}
+
+int32_t ec_bits_count(uint64_t n)
+{
+ n -= (n >> 1) & 0x5555555555555555ULL;
+ n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
+ n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+ n += n >> 8;
+ n += n >> 16;
+ n += n >> 32;
+
+ return n & 0xFF;
+}
+
+int32_t ec_bits_index(uint64_t n)
+{
+ return ffsll(n) - 1;
+}
+
+int32_t ec_bits_consume(uint64_t * n)
+{
+ uint64_t tmp;
+
+ tmp = *n;
+ tmp &= -tmp;
+ *n ^= tmp;
+
+ return ffsll(tmp) - 1;
+}
+
+size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
+ off_t offset, size_t size)
+{
+ int32_t i = 0;
+ size_t total = 0, len = 0;
+
+ while (i < count)
+ {
+ if (offset < vector[i].iov_len)
+ {
+ while ((i < count) && (size > 0))
+ {
+ len = size;
+ if (len > vector[i].iov_len - offset)
+ {
+ len = vector[i].iov_len - offset;
+ }
+ memcpy(dst, vector[i++].iov_base + offset, len);
+ offset = 0;
+ dst += len;
+ total += len;
+ size -= len;
+ }
+
+ break;
+ }
+
+ offset -= vector[i].iov_len;
+ i++;
+ }
+
+ return total;
+}
+
+int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value)
+{
+ uint64_t * ptr;
+
+ ptr = GF_MALLOC(sizeof(value), gf_common_mt_char);
+ if (ptr == NULL)
+ {
+ return -1;
+ }
+
+ *ptr = hton64(value);
+
+ return dict_set_bin(dict, key, ptr, sizeof(value));
+}
+
+int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value)
+{
+ void * ptr;
+ int32_t len;
+
+ if ((dict == NULL) || (dict_get_ptr_and_len(dict, key, &ptr, &len) != 0) ||
+ (len != sizeof(uint64_t)))
+ {
+ return -1;
+ }
+
+ *value = ntoh64(*(uint64_t *)ptr);
+
+ dict_del(dict, key);
+
+ return 0;
+}
+
+int32_t ec_loc_gfid_check(xlator_t * xl, uuid_t dst, uuid_t src)
+{
+ if (uuid_is_null(src))
+ {
+ return 1;
+ }
+
+ if (uuid_is_null(dst))
+ {
+ uuid_copy(dst, src);
+
+ return 1;
+ }
+
+ if (uuid_compare(dst, src) != 0)
+ {
+ gf_log(xl->name, GF_LOG_WARNING, "Mismatching GFID's in loc");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, char ** name)
+{
+ char * str = NULL;
+ int32_t error = 0;
+
+ memset(parent, 0, sizeof(loc_t));
+
+ if (loc->path == NULL)
+ {
+ gf_log(xl->name, GF_LOG_ERROR, "inode path missing in loc_t: %p", loc->parent);
+
+ return EINVAL;
+ }
+
+ if (loc->parent == NULL)
+ {
+ if ((loc->inode == NULL) || !__is_root_gfid(loc->inode->gfid) ||
+ (strcmp(loc->path, "/") != 0))
+ {
+ gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for "
+ "loc_t (path=%s, name=%s)",
+ loc->path, loc->name);
+
+ return EINVAL;
+ }
+
+ if (loc_copy(parent, loc) != 0)
+ {
+ return ENOMEM;
+ }
+
+ parent->name = NULL;
+
+ if (name != NULL)
+ {
+ *name = NULL;
+ }
+ }
+ else
+ {
+ if (uuid_is_null(loc->parent->gfid) && (uuid_is_null(loc->pargfid)))
+ {
+ gf_log(xl->name, GF_LOG_ERROR, "Invalid parent inode "
+ "(path=%s, name=%s)",
+ loc->path, loc->name);
+
+ return EINVAL;
+ }
+ uuid_copy(parent->gfid, loc->pargfid);
+
+ str = gf_strdup(loc->path);
+ if (str == NULL)
+ {
+ gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path "
+ "'%s'", str);
+
+ return ENOMEM;
+ }
+ if (name != NULL)
+ {
+ *name = gf_strdup(basename(str));
+ if (*name == NULL)
+ {
+ gf_log(xl->name, GF_LOG_ERROR, "Unable to get basename "
+ "of '%s'", str);
+
+ error = ENOMEM;
+
+ goto out;
+ }
+ strcpy(str, loc->path);
+ }
+ parent->path = gf_strdup(dirname(str));
+ if (parent->path == NULL)
+ {
+ gf_log(xl->name, GF_LOG_ERROR, "Unable to get dirname of "
+ "'%s'", str);
+
+ error = ENOMEM;
+
+ goto out;
+ }
+ parent->name = strrchr(parent->path, '/');
+ if (parent->name == NULL)
+ {
+ gf_log(xl->name, GF_LOG_ERROR, "Invalid path name (%s)",
+ parent->path);
+
+ error = EINVAL;
+
+ goto out;
+ }
+ parent->name++;
+ parent->inode = inode_ref(loc->parent);
+ }
+
+ if ((loc->inode == NULL) ||
+ ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid))
+ {
+ parent = NULL;
+ }
+
+out:
+ GF_FREE(str);
+
+ if (parent != NULL)
+ {
+ loc_wipe(parent);
+ }
+
+ return error;
+}
+
+int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode,
+ struct iatt * iatt)
+{
+ if ((inode != NULL) && (loc->inode != inode))
+ {
+ if (loc->inode != NULL)
+ {
+ inode_unref(loc->inode);
+ }
+ loc->inode = inode_ref(inode);
+
+ uuid_copy(loc->gfid, inode->gfid);
+ }
+ else if (loc->inode != NULL)
+ {
+ if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid))
+ {
+ return 0;
+ }
+ }
+
+ if (iatt != NULL)
+ {
+ if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid))
+ {
+ return 0;
+ }
+ }
+
+ if (loc->parent != NULL)
+ {
+ if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid))
+ {
+ return 0;
+ }
+
+ }
+
+ if (uuid_is_null(loc->gfid))
+ {
+ gf_log(xl->name, GF_LOG_WARNING, "GFID not available for inode");
+ }
+
+ return 1;
+}
+
+int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd)
+{
+ ec_fd_t * ctx;
+
+ memset(loc, 0, sizeof(*loc));
+
+ ctx = ec_fd_get(fd, xl);
+ if (ctx != NULL)
+ {
+ if (loc_copy(loc, &ctx->loc) != 0)
+ {
+ return 0;
+ }
+ }
+
+ if (ec_loc_prepare(xl, loc, fd->inode, NULL))
+ {
+ return 1;
+ }
+
+ loc_wipe(loc);
+
+ return 0;
+}
+
+int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src)
+{
+ memset(dst, 0, sizeof(*dst));
+
+ if (loc_copy(dst, src) != 0)
+ {
+ return 0;
+ }
+
+ if (ec_loc_prepare(xl, dst, NULL, NULL))
+ {
+ return 1;
+ }
+
+ loc_wipe(dst);
+
+ return 0;
+}
+
+void ec_owner_set(call_frame_t * frame, void * owner)
+{
+ set_lk_owner_from_ptr(&frame->root->lk_owner, owner);
+}
+
+void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner)
+{
+ frame->root->lk_owner.len = owner->len;
+ memcpy(frame->root->lk_owner.data, owner->data, owner->len);
+}
+
+ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl)
+{
+ ec_inode_t * ctx = NULL;
+ uint64_t value = 0;
+
+ if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0))
+ {
+ ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_inode_t);
+ if (ctx != NULL)
+ {
+ memset(ctx, 0, sizeof(*ctx));
+
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__inode_ctx_set(inode, xl, &value) != 0)
+ {
+ GF_FREE(ctx);
+
+ return NULL;
+ }
+ }
+ }
+ else
+ {
+ ctx = (ec_inode_t *)(uintptr_t)value;
+ }
+
+ return ctx;
+}
+
+ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
+{
+ ec_inode_t * ctx = NULL;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, xl);
+
+ UNLOCK(&inode->lock);
+
+ return ctx;
+}
+
+ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
+{
+ ec_fd_t * ctx = NULL;
+ uint64_t value = 0;
+
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
+ {
+ ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
+ if (ctx != NULL)
+ {
+ memset(ctx, 0, sizeof(*ctx));
+
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__fd_ctx_set(fd, xl, value) != 0)
+ {
+ GF_FREE(ctx);
+
+ return NULL;
+ }
+ }
+ }
+ else
+ {
+ ctx = (ec_fd_t *)(uintptr_t)value;
+ }
+
+ return ctx;
+}
+
+ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl)
+{
+ ec_fd_t * ctx = NULL;
+
+ LOCK(&fd->lock);
+
+ ctx = __ec_fd_get(fd, xl);
+
+ UNLOCK(&fd->lock);
+
+ return ctx;
+}
+
+size_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale)
+{
+ size_t head, tmp;
+
+ tmp = *offset;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
+ if (scale)
+ {
+ tmp /= ec->fragments;
+ }
+
+ *offset = tmp;
+
+ return head;
+}
+
+size_t ec_adjust_size(ec_t * ec, size_t size, int32_t scale)
+{
+ size += ec->stripe_size - 1;
+ size -= size % ec->stripe_size;
+ if (scale)
+ {
+ size /= ec->fragments;
+ }
+
+ return size;
+}