diff options
Diffstat (limited to 'xlators/cluster/ec/src/ec-helpers.c')
| -rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 950 |
1 files changed, 604 insertions, 346 deletions
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 181a6f7b41a..48f54475e01 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -1,65 +1,38 @@ /* - Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es> + Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es> + This file is part of GlusterFS. - This file is part of the cluster/ec translator for GlusterFS. - - The cluster/ec translator for GlusterFS is free software: you can - redistribute it and/or modify it under the terms of the GNU General - Public License as published by the Free Software Foundation, either - version 3 of the License, or (at your option) any later version. - - The cluster/ec translator for GlusterFS is distributed in the hope - that it will be useful, but WITHOUT ANY WARRANTY; without even the - implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - PURPOSE. See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the cluster/ec translator for GlusterFS. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #include <libgen.h> -#include "byte-order.h" +#include <glusterfs/byte-order.h> +#include "ec.h" #include "ec-mem-types.h" +#include "ec-messages.h" #include "ec-fops.h" +#include "ec-method.h" #include "ec-helpers.h" -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) +static const char *ec_fop_list[] = {[-EC_FOP_HEAL] = "HEAL"}; -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) - -#ifndef ffsll -#define ffsll(x) __builtin_ffsll(x) -#endif - -static const char * ec_fop_list[] = -{ - [-EC_FOP_HEAL] = "HEAL" -}; - -const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits) +const char * +ec_bin(char *str, size_t size, uint64_t value, int32_t digits) { str += size; - if (size-- < 1) - { + if (size-- < 1) { goto failed; } *--str = 0; - while ((value != 0) || (digits > 0)) - { - if (size-- < 1) - { + while ((value != 0) || (digits > 0)) { + if (size-- < 1) { goto failed; } *--str = '0' + (value & 1); @@ -73,21 +46,22 @@ failed: return "<buffer too small>"; } -const char * ec_fop_name(int32_t id) +const char * +ec_fop_name(int32_t id) { - if (id >= 0) - { + if (id >= 0) { return gf_fop_list[id]; } return ec_fop_list[-id]; } -void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...) +void +ec_trace(const char *event, ec_fop_data_t *fop, const char *fmt, ...) { char str1[32], str2[32], str3[32]; - char * msg; - ec_t * ec = fop->xl->private; + char *msg; + ec_t *ec = fop->xl->private; va_list args; int32_t ret; @@ -95,80 +69,28 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...) ret = vasprintf(&msg, fmt, args); va_end(args); - if (ret < 0) - { + if (ret < 0) { msg = "<memory allocation error>"; } - gf_log("ec", GF_LOG_TRACE, "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] " - "frame=%p/%p, min/exp=%d/%d, err=%d state=%d " - "{%s:%s:%s} %s", - event, ec_fop_name(fop->id), fop, fop->parent, fop->refs, - fop->winds, fop->jobs, fop->req_frame, fop->frame, fop->minimum, - fop->expected, fop->error, fop->state, - ec_bin(str1, sizeof(str1), fop->mask, ec->nodes), - ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes), - ec_bin(str3, sizeof(str3), fop->bad, ec->nodes), msg); + gf_msg_trace("ec", 0, + "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] " + "frame=%p/%p, min/exp=%d/%d, err=%d state=%d " + "{%s:%s:%s} %s", + event, ec_fop_name(fop->id), fop, fop->parent, fop->refs, + fop->winds, fop->jobs, fop->req_frame, fop->frame, + fop->minimum, fop->expected, fop->error, fop->state, + ec_bin(str1, sizeof(str1), fop->mask, ec->nodes), + ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes), + ec_bin(str3, sizeof(str3), fop->good, ec->nodes), msg); - if (ret >= 0) - { + if (ret >= 0) { free(msg); } } -uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset) -{ - int32_t bits; - - if (offset == -1ULL) - { - return -1ULL; - } - - bits = ec->bits_for_nodes; - if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0) - { - return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx; - } - - return (offset * ec->nodes) + idx; -} - -uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset) -{ - uint64_t mask = 0; - - if ((offset & TOP_BIT) != 0) - { - mask = MASK << ec->bits_for_nodes; - - *idx = offset & ~mask; - return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS; - } - - *idx = offset % ec->nodes; - - return offset / ec->nodes; -} - -int32_t ec_bits_count(uint64_t n) -{ - n -= (n >> 1) & 0x5555555555555555ULL; - n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL); - n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL; - n += n >> 8; - n += n >> 16; - n += n >> 32; - - return n & 0xFF; -} - -int32_t ec_bits_index(uint64_t n) -{ - return ffsll(n) - 1; -} - -int32_t ec_bits_consume(uint64_t * n) +int32_t +ec_bits_consume(uint64_t *n) { uint64_t tmp; @@ -176,24 +98,21 @@ int32_t ec_bits_consume(uint64_t * n) tmp &= -tmp; *n ^= tmp; - return ffsll(tmp) - 1; + return gf_bits_index(tmp); } -size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count, - off_t offset, size_t size) +size_t +ec_iov_copy_to(void *dst, struct iovec *vector, int32_t count, off_t offset, + size_t size) { int32_t i = 0; size_t total = 0, len = 0; - while (i < count) - { - if (offset < vector[i].iov_len) - { - while ((i < count) && (size > 0)) - { + while (i < count) { + if (offset < vector[i].iov_len) { + while ((i < count) && (size > 0)) { len = size; - if (len > vector[i].iov_len - offset) - { + if (len > vector[i].iov_len - offset) { len = vector[i].iov_len - offset; } memcpy(dst, vector[i++].iov_base + offset, len); @@ -213,30 +132,161 @@ size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count, return total; } -int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value) +int32_t +ec_buffer_alloc(xlator_t *xl, size_t size, struct iobref **piobref, void **ptr) +{ + struct iobref *iobref = NULL; + struct iobuf *iobuf = NULL; + int32_t ret = -ENOMEM; + + iobuf = iobuf_get_page_aligned(xl->ctx->iobuf_pool, size, + EC_METHOD_WORD_SIZE); + if (iobuf == NULL) { + goto out; + } + + iobref = *piobref; + if (iobref == NULL) { + iobref = iobref_new(); + if (iobref == NULL) { + goto out; + } + } + + ret = iobref_add(iobref, iobuf); + if (ret != 0) { + if (iobref != *piobref) { + iobref_unref(iobref); + } + iobref = NULL; + + goto out; + } + + GF_ASSERT(EC_ALIGN_CHECK(iobuf->ptr, EC_METHOD_WORD_SIZE)); + + *ptr = iobuf->ptr; + +out: + if (iobuf != NULL) { + iobuf_unref(iobuf); + } + + if (iobref != NULL) { + *piobref = iobref; + } + + return ret; +} + +int32_t +ec_dict_set_array(dict_t *dict, char *key, uint64_t value[], int32_t size) { - uint64_t * ptr; + int ret = -1; + uint64_t *ptr = NULL; + int32_t vindex; + + if (value == NULL) { + return -EINVAL; + } + + ptr = GF_MALLOC(sizeof(uint64_t) * size, gf_common_mt_char); + if (ptr == NULL) { + return -ENOMEM; + } + for (vindex = 0; vindex < size; vindex++) { + ptr[vindex] = hton64(value[vindex]); + } + ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t) * size); + if (ret) + GF_FREE(ptr); + return ret; +} + +int32_t +ec_dict_get_array(dict_t *dict, char *key, uint64_t value[], int32_t size) +{ + void *ptr; + int32_t len; + int32_t vindex; + int32_t old_size = 0; + int32_t err; + + if (dict == NULL) { + return -EINVAL; + } + err = dict_get_ptr_and_len(dict, key, &ptr, &len); + if (err != 0) { + return err; + } + + if (len > (size * sizeof(uint64_t)) || (len % sizeof(uint64_t))) { + return -EINVAL; + } + + /* 3.6 version ec would have stored version in 64 bit. In that case treat + * metadata versions same as data*/ + old_size = min(size, len / sizeof(uint64_t)); + for (vindex = 0; vindex < old_size; vindex++) { + value[vindex] = ntoh64(*((uint64_t *)ptr + vindex)); + } + + if (old_size < size) { + for (vindex = old_size; vindex < size; vindex++) { + value[vindex] = value[old_size - 1]; + } + } + + return 0; +} + +int32_t +ec_dict_del_array(dict_t *dict, char *key, uint64_t value[], int32_t size) +{ + int ret = 0; + + ret = ec_dict_get_array(dict, key, value, size); + if (ret == 0) + dict_del(dict, key); + + return ret; +} + +int32_t +ec_dict_set_number(dict_t *dict, char *key, uint64_t value) +{ + int ret = -1; + uint64_t *ptr; ptr = GF_MALLOC(sizeof(value), gf_common_mt_char); - if (ptr == NULL) - { - return -1; + if (ptr == NULL) { + return -ENOMEM; } *ptr = hton64(value); - return dict_set_bin(dict, key, ptr, sizeof(value)); + ret = dict_set_bin(dict, key, ptr, sizeof(value)); + if (ret) + GF_FREE(ptr); + + return ret; } -int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value) +int32_t +ec_dict_del_number(dict_t *dict, char *key, uint64_t *value) { - void * ptr; - int32_t len; + void *ptr; + int32_t len, err; - if ((dict == NULL) || (dict_get_ptr_and_len(dict, key, &ptr, &len) != 0) || - (len != sizeof(uint64_t))) - { - return -1; + if (dict == NULL) { + return -EINVAL; + } + err = dict_get_ptr_and_len(dict, key, &ptr, &len); + if (err != 0) { + return err; + } + if (len != sizeof(uint64_t)) { + return -EINVAL; } *value = ntoh64(*(uint64_t *)ptr); @@ -246,279 +296,448 @@ int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value) return 0; } -int32_t ec_loc_gfid_check(xlator_t * xl, uuid_t dst, uuid_t src) +int32_t +ec_dict_set_config(dict_t *dict, char *key, ec_config_t *config) { - if (uuid_is_null(src)) - { - return 1; + int ret = -1; + uint64_t *ptr, data; + + if (config->version > EC_CONFIG_VERSION) { + gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION, + "Trying to store an unsupported config " + "version (%u)", + config->version); + + return -EINVAL; + } + + ptr = GF_MALLOC(sizeof(uint64_t), gf_common_mt_char); + if (ptr == NULL) { + return -ENOMEM; } - if (uuid_is_null(dst)) - { - uuid_copy(dst, src); + data = ((uint64_t)config->version) << 56; + data |= ((uint64_t)config->algorithm) << 48; + data |= ((uint64_t)config->gf_word_size) << 40; + data |= ((uint64_t)config->bricks) << 32; + data |= ((uint64_t)config->redundancy) << 24; + data |= config->chunk_size; + + *ptr = hton64(data); + + ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t)); + if (ret) + GF_FREE(ptr); - return 1; + return ret; +} + +int32_t +ec_dict_del_config(dict_t *dict, char *key, ec_config_t *config) +{ + void *ptr; + uint64_t data; + int32_t len, err; + + if (dict == NULL) { + return -EINVAL; + } + err = dict_get_ptr_and_len(dict, key, &ptr, &len); + if (err != 0) { + return err; + } + if (len != sizeof(uint64_t)) { + return -EINVAL; } - if (uuid_compare(dst, src) != 0) - { - gf_log(xl->name, GF_LOG_WARNING, "Mismatching GFID's in loc"); + data = ntoh64(*(uint64_t *)ptr); + /* Currently we need to get the config xattr for entries of type IA_INVAL. + * These entries can later become IA_DIR entries (after inode_link()), + * which don't have a config xattr. However, since the xattr is requested + * using an xattrop() fop, it will always return a config full of 0's + * instead of saying that it doesn't exist. + * + * We need to filter out this case and consider that a config xattr == 0 is + * the same as a non-existent xattr. Otherwise ec_config_check() will fail. + */ + if (data == 0) { + return -ENODATA; + } - return 0; + config->version = (data >> 56) & 0xff; + if (config->version > EC_CONFIG_VERSION) { + gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION, + "Found an unsupported config version (%u)", config->version); + + return -EINVAL; } - return 1; + config->algorithm = (data >> 48) & 0xff; + config->gf_word_size = (data >> 40) & 0xff; + config->bricks = (data >> 32) & 0xff; + config->redundancy = (data >> 24) & 0xff; + config->chunk_size = data & 0xffffff; + + dict_del(dict, key); + + return 0; } -int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, char ** name) +gf_boolean_t +ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src) { - char * str = NULL; - int32_t error = 0; + if (gf_uuid_is_null(src)) { + return _gf_true; + } - memset(parent, 0, sizeof(loc_t)); + if (gf_uuid_is_null(dst)) { + gf_uuid_copy(dst, src); + + return _gf_true; + } - if (loc->path == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "inode path missing in loc_t: %p", loc->parent); + if (gf_uuid_compare(dst, src) != 0) { + gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_GFID_MISMATCH, + "Mismatching GFID's in loc"); - return EINVAL; + return _gf_false; } - if (loc->parent == NULL) - { - if ((loc->inode == NULL) || !__is_root_gfid(loc->inode->gfid) || - (strcmp(loc->path, "/") != 0)) - { - gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for " - "loc_t (path=%s, name=%s)", - loc->path, loc->name); + return _gf_true; +} - return EINVAL; +int32_t +ec_loc_setup_inode(xlator_t *xl, inode_table_t *table, loc_t *loc) +{ + int32_t ret = -EINVAL; + + if (loc->inode != NULL) { + if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) { + goto out; } + } else if (table != NULL) { + if (!gf_uuid_is_null(loc->gfid)) { + loc->inode = inode_find(table, loc->gfid); + } else if (loc->path && strchr(loc->path, '/')) { + loc->inode = inode_resolve(table, (char *)loc->path); + } + } + + ret = 0; + +out: + return ret; +} - if (loc_copy(parent, loc) != 0) - { - return ENOMEM; +int32_t +ec_loc_setup_parent(xlator_t *xl, inode_table_t *table, loc_t *loc) +{ + char *path, *parent; + int32_t ret = -EINVAL; + + if (loc->parent != NULL) { + if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid)) { + goto out; } + } else if (table != NULL) { + if (!gf_uuid_is_null(loc->pargfid)) { + loc->parent = inode_find(table, loc->pargfid); + } else if (loc->path && strchr(loc->path, '/')) { + path = gf_strdup(loc->path); + if (path == NULL) { + gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Unable to duplicate path '%s'", loc->path); - parent->name = NULL; + ret = -ENOMEM; - if (name != NULL) - { - *name = NULL; + goto out; + } + parent = dirname(path); + loc->parent = inode_resolve(table, parent); + if (loc->parent != NULL) { + gf_uuid_copy(loc->pargfid, loc->parent->gfid); + } + GF_FREE(path); } } - else - { - if (uuid_is_null(loc->parent->gfid) && (uuid_is_null(loc->pargfid))) - { - gf_log(xl->name, GF_LOG_ERROR, "Invalid parent inode " - "(path=%s, name=%s)", - loc->path, loc->name); - return EINVAL; - } - uuid_copy(parent->gfid, loc->pargfid); + /* If 'pargfid' has not been determined, clear 'name' to avoid resolutions + based on <gfid:pargfid>/name. */ + if (gf_uuid_is_null(loc->pargfid)) { + loc->name = NULL; + } - str = gf_strdup(loc->path); - if (str == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path " - "'%s'", str); + ret = 0; + +out: + return ret; +} - return ENOMEM; +int32_t +ec_loc_setup_path(xlator_t *xl, loc_t *loc) +{ + static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char *name; + int32_t ret = -EINVAL; + + if (loc->path != NULL) { + name = strrchr(loc->path, '/'); + if (name == NULL) { + /* Allow gfid paths: <gfid:...> */ + if (strncmp(loc->path, "<gfid:", 6) == 0) { + ret = 0; + } + goto out; } - if (name != NULL) - { - *name = gf_strdup(basename(str)); - if (*name == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Unable to get basename " - "of '%s'", str); + if (name == loc->path) { + if (name[1] == 0) { + if (!ec_loc_gfid_check(xl, loc->gfid, root)) { + goto out; + } + } else { + if (!ec_loc_gfid_check(xl, loc->pargfid, root)) { + goto out; + } + } + } + name++; - error = ENOMEM; + if (loc->name != NULL) { + if (strcmp(loc->name, name) != 0) { + gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_LOC_NAME, + "Invalid name '%s' in loc", loc->name); goto out; } - strcpy(str, loc->path); + } else { + loc->name = name; } - parent->path = gf_strdup(dirname(str)); - if (parent->path == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Unable to get dirname of " - "'%s'", str); + } + + ret = 0; + +out: + return ret; +} + +int32_t +ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) +{ + inode_table_t *table = NULL; + char *str = NULL; + int32_t ret = -ENOMEM; + + memset(parent, 0, sizeof(loc_t)); - error = ENOMEM; + if (loc->parent != NULL) { + table = loc->parent->table; + parent->inode = inode_ref(loc->parent); + } else if (loc->inode != NULL) { + table = loc->inode->table; + } + if (!gf_uuid_is_null(loc->pargfid)) { + gf_uuid_copy(parent->gfid, loc->pargfid); + } + if (loc->path && strchr(loc->path, '/')) { + str = gf_strdup(loc->path); + if (str == NULL) { + gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Unable to duplicate path '%s'", loc->path); goto out; } - parent->name = strrchr(parent->path, '/'); - if (parent->name == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Invalid path name (%s)", - parent->path); - - error = EINVAL; + parent->path = gf_strdup(dirname(str)); + if (parent->path == NULL) { + gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Unable to duplicate path '%s'", dirname(str)); goto out; } - parent->name++; - parent->inode = inode_ref(loc->parent); } - if ((loc->inode == NULL) || - ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) - { - parent = NULL; + ret = ec_loc_setup_path(xl, parent); + if (ret == 0) { + ret = ec_loc_setup_inode(xl, table, parent); + } + if (ret == 0) { + ret = ec_loc_setup_parent(xl, table, parent); + } + if (ret != 0) { + goto out; + } + + if ((parent->inode == NULL) && (parent->path == NULL) && + gf_uuid_is_null(parent->gfid)) { + gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_LOC_PARENT_INODE_MISSING, + "Parent inode missing for loc_t"); + + ret = -EINVAL; + + goto out; } + ret = 0; + out: GF_FREE(str); - if (parent != NULL) - { + if (ret != 0) { loc_wipe(parent); } - return error; + return ret; } -int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode, - struct iatt * iatt) +int32_t +ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, struct iatt *iatt) { - if ((inode != NULL) && (loc->inode != inode)) - { - if (loc->inode != NULL) - { - inode_unref(loc->inode); - } - loc->inode = inode_ref(inode); - - uuid_copy(loc->gfid, inode->gfid); - } - else if (loc->inode != NULL) - { - if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) - { - return 0; + inode_table_t *table = NULL; + int32_t ret = -EINVAL; + + if (inode != NULL) { + table = inode->table; + if (loc->inode != inode) { + if (loc->inode != NULL) { + inode_unref(loc->inode); + } + loc->inode = inode_ref(inode); + gf_uuid_copy(loc->gfid, inode->gfid); } + } else if (loc->inode != NULL) { + table = loc->inode->table; + } else if (loc->parent != NULL) { + table = loc->parent->table; } - if (iatt != NULL) - { - if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid)) - { - return 0; + if (iatt != NULL) { + if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid)) { + goto out; } } - if (loc->parent != NULL) - { - if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid)) - { - return 0; - } - + ret = ec_loc_setup_path(xl, loc); + if (ret == 0) { + ret = ec_loc_setup_inode(xl, table, loc); } - - if (uuid_is_null(loc->gfid)) - { - gf_log(xl->name, GF_LOG_WARNING, "GFID not available for inode"); + if (ret == 0) { + ret = ec_loc_setup_parent(xl, table, loc); + } + if (ret != 0) { + goto out; } - return 1; +out: + return ret; } -int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd) +int32_t +ec_loc_from_fd(xlator_t *xl, loc_t *loc, fd_t *fd) { - ec_fd_t * ctx; + ec_fd_t *ctx; + int32_t ret = -ENOMEM; memset(loc, 0, sizeof(*loc)); ctx = ec_fd_get(fd, xl); - if (ctx != NULL) - { - if (loc_copy(loc, &ctx->loc) != 0) - { - return 0; + if (ctx != NULL) { + if (loc_copy(loc, &ctx->loc) != 0) { + goto out; } } - if (ec_loc_prepare(xl, loc, fd->inode, NULL)) - { - return 1; + ret = ec_loc_update(xl, loc, fd->inode, NULL); + if (ret != 0) { + goto out; } - loc_wipe(loc); +out: + if (ret != 0) { + loc_wipe(loc); + } - return 0; + return ret; } -int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src) +int32_t +ec_loc_from_loc(xlator_t *xl, loc_t *dst, loc_t *src) { + int32_t ret = -ENOMEM; + memset(dst, 0, sizeof(*dst)); - if (loc_copy(dst, src) != 0) - { - return 0; + if (loc_copy(dst, src) != 0) { + goto out; } - if (ec_loc_prepare(xl, dst, NULL, NULL)) - { - return 1; + ret = ec_loc_update(xl, dst, NULL, NULL); + if (ret != 0) { + goto out; } - loc_wipe(dst); +out: + if (ret != 0) { + loc_wipe(dst); + } - return 0; + return ret; } -void ec_owner_set(call_frame_t * frame, void * owner) +void +ec_owner_set(call_frame_t *frame, void *owner) { set_lk_owner_from_ptr(&frame->root->lk_owner, owner); } -void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner) +void +ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner) +{ + lk_owner_copy(&frame->root->lk_owner, owner); +} + +static void +ec_stripe_cache_init(ec_t *ec, ec_inode_t *ctx) { - frame->root->lk_owner.len = owner->len; - memcpy(frame->root->lk_owner.data, owner->data, owner->len); + ec_stripe_list_t *stripe_cache = NULL; + + stripe_cache = &(ctx->stripe_cache); + if (stripe_cache->max == 0) { + stripe_cache->max = ec->stripe_cache; + } } -ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl) +ec_inode_t * +__ec_inode_get(inode_t *inode, xlator_t *xl) { - ec_inode_t * ctx = NULL; + ec_inode_t *ctx = NULL; uint64_t value = 0; - if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0)) - { + if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0)) { ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_inode_t); - if (ctx != NULL) - { + if (ctx != NULL) { memset(ctx, 0, sizeof(*ctx)); - + INIT_LIST_HEAD(&ctx->heal); + INIT_LIST_HEAD(&ctx->stripe_cache.lru); + ctx->heal_count = 0; value = (uint64_t)(uintptr_t)ctx; - if (__inode_ctx_set(inode, xl, &value) != 0) - { + if (__inode_ctx_set(inode, xl, &value) != 0) { GF_FREE(ctx); return NULL; } - - INIT_LIST_HEAD(&ctx->entry_locks); - INIT_LIST_HEAD(&ctx->inode_locks); } - } - else - { + } else { ctx = (ec_inode_t *)(uintptr_t)value; } + if (ctx) + ec_stripe_cache_init(xl->private, ctx); return ctx; } -ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl) +ec_inode_t * +ec_inode_get(inode_t *inode, xlator_t *xl) { - ec_inode_t * ctx = NULL; + ec_inode_t *ctx = NULL; LOCK(&inode->lock); @@ -529,81 +748,120 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl) return ctx; } -ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl) +ec_fd_t * +__ec_fd_get(fd_t *fd, xlator_t *xl) { - ec_fd_t * ctx = NULL; + int i = 0; + ec_fd_t *ctx = NULL; + ec_inode_t *ictx = NULL; uint64_t value = 0; + ec_t *ec = xl->private; - if (fd->anonymous) - { - return NULL; - } - - if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) - { - ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t); - if (ctx != NULL) - { + if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) { + ctx = GF_MALLOC(sizeof(*ctx) + (sizeof(ec_fd_status_t) * ec->nodes), + ec_mt_ec_fd_t); + if (ctx != NULL) { memset(ctx, 0, sizeof(*ctx)); + for (i = 0; i < ec->nodes; i++) { + if (fd_is_anonymous(fd)) { + ctx->fd_status[i] = EC_FD_OPENED; + } else { + ctx->fd_status[i] = EC_FD_NOT_OPENED; + } + } + value = (uint64_t)(uintptr_t)ctx; - if (__fd_ctx_set(fd, xl, value) != 0) - { + if (__fd_ctx_set(fd, xl, value) != 0) { GF_FREE(ctx); - return NULL; } + /* Only refering bad-version so no need for lock + * */ + ictx = __ec_inode_get(fd->inode, xl); + if (ictx) { + ctx->bad_version = ictx->bad_version; + } } - } - else - { + } else { ctx = (ec_fd_t *)(uintptr_t)value; } + /* Treat anonymous fd specially */ + if (fd->anonymous && ctx) { + /* Mark the fd open for all subvolumes. */ + ctx->open = -1; + /* Try to populate ctx->loc with fd->inode information. */ + ec_loc_update(xl, &ctx->loc, fd->inode, NULL); + } + return ctx; } -ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl) +ec_fd_t * +ec_fd_get(fd_t *fd, xlator_t *xl) { - ec_fd_t * ctx = NULL; + ec_fd_t *ctx = NULL; - if (!fd->anonymous) - { - LOCK(&fd->lock); + LOCK(&fd->lock); - ctx = __ec_fd_get(fd, xl); + ctx = __ec_fd_get(fd, xl); - UNLOCK(&fd->lock); - } + UNLOCK(&fd->lock); return ctx; } -uint32_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale) +gf_boolean_t +ec_is_internal_xattr(dict_t *dict, char *key, data_t *value, void *data) { - off_t head, tmp; - - tmp = *offset; - head = tmp % ec->stripe_size; - tmp -= head; - if (scale) - { - tmp /= ec->fragments; - } + if (key && (strncmp(key, EC_XATTR_PREFIX, SLEN(EC_XATTR_PREFIX)) == 0)) + return _gf_true; - *offset = tmp; - - return head; + return _gf_false; } -uint64_t ec_adjust_size(ec_t * ec, uint64_t size, int32_t scale) +void +ec_filter_internal_xattrs(dict_t *xattr) { - size += ec->stripe_size - 1; - size -= size % ec->stripe_size; - if (scale) - { - size /= ec->fragments; - } + dict_foreach_match(xattr, ec_is_internal_xattr, NULL, + dict_remove_foreach_fn, NULL); +} - return size; +gf_boolean_t +ec_is_data_fop(glusterfs_fop_t fop) +{ + switch (fop) { + case GF_FOP_WRITE: + case GF_FOP_TRUNCATE: + case GF_FOP_FTRUNCATE: + case GF_FOP_FALLOCATE: + case GF_FOP_DISCARD: + case GF_FOP_ZEROFILL: + return _gf_true; + default: + return _gf_false; + } + return _gf_false; } +/* +gf_boolean_t +ec_is_metadata_fop (int32_t lock_kind, glusterfs_fop_t fop) +{ + if (lock_kind == EC_LOCK_ENTRY) { + return _gf_false; + } + + switch (fop) { + case GF_FOP_SETATTR: + case GF_FOP_FSETATTR: + case GF_FOP_SETXATTR: + case GF_FOP_FSETXATTR: + case GF_FOP_REMOVEXATTR: + case GF_FOP_FREMOVEXATTR: + return _gf_true; + default: + return _gf_false; + } + return _gf_false; +}*/ |
