summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/cluster/dht/src/dht-common.c455
-rw-r--r--xlators/cluster/dht/src/dht-common.h11
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c25
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c2
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c18
5 files changed, 455 insertions, 56 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index da36a3b33a5..681b74aa2f6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -3503,7 +3503,6 @@ err:
return 0;
}
-
static int
dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
@@ -5463,9 +5462,6 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
-
local = frame->local;
if (!local) {
op_ret = -1;
@@ -5473,6 +5469,9 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
+ if (op_ret == -1)
+ goto out;
+
prev = cookie;
if (local->loc.parent) {
@@ -5492,18 +5491,34 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
goto out;
}
+
+ local->op_errno = op_errno;
+
if (local->linked == _gf_true) {
local->stbuf = *stbuf;
dht_linkfile_attr_heal (frame, this);
}
out:
+
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent, xdata);
+
+ if (local && local->lock.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock (frame, this, op_ret);
+
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd,
+ inode, stbuf, preparent, postparent,
+ xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode,
+ stbuf, preparent, postparent, xdata);
+ }
return 0;
}
-
int
dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
@@ -5515,8 +5530,10 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
- if (op_ret == -1)
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
goto err;
+ }
local = frame->local;
cached_subvol = local->cached_subvol;
@@ -5528,25 +5545,327 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ if (local->lock.locks)
+ local->refresh_layout_unlock (frame, this, -1);
+
+ return 0;
+}
+
+int
+dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *params)
+{
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
+
+ local = frame->local;
+
+ if (!dht_is_subvol_filled (this, subvol)) {
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path,
+ subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+
+ } else {
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
+
+ if (avail_subvol != subvol) {
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+ }
+out:
return 0;
}
int
+dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno)
+{
+ inode_table_t *table = NULL;
+ int ret = -1;
+
+ if (!parent || !child) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (child->parent) {
+ parent->inode = inode_ref (child->parent);
+ if (!parent->inode) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+
+ goto out;
+ } else {
+ if (gf_uuid_is_null (child->pargfid)) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->itable;
+
+ if (!table) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ parent->inode = inode_find (table, child->pargfid);
+
+ if (!parent->inode) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ gf_uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+dht_create_do (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ local = frame->local;
+
+ this = THIS;
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf, err);
+
+ methods = conf->methods;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf->methods, err);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe (&local->loc);
+
+ loc_copy (&local->loc, &local->loc2);
+
+ loc_wipe (&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search (this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in "
+ "layout for path=%s", local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc,
+ local->flags, local->mode,
+ local->umask, local->fd, local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock (frame, this, -1);
+
+ return 0;
+}
+
+int32_t
+dht_create_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+int32_t
+dht_create_finish (call_frame_t *frame, xlator_t *this, int op_ret)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_create_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ if (op_ret == 0)
+ return 0;
+
+ DHT_STACK_UNWIND (create, frame, op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+dht_create_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "Create lock failed for file: %s", local->loc2.name);
+
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_create_finish;
+
+ local->refresh_layout_done = dht_create_do;
+
+ dht_refresh_layout (frame);
+
+ return 0;
+err:
+ dht_create_finish (frame, this, -1);
+ return 0;
+}
+
+int32_t
+dht_create_lock (call_frame_t *frame, xlator_t *subvol)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ dht_create_lock_cbk);
+
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
+
+int
dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
mode_t umask, fd_t *fd, dict_t *params)
{
- int op_errno = -1;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
+ conf = this->private;
+
dht_get_du_info (frame, this, loc);
local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
@@ -5569,48 +5888,90 @@ dht_create (call_frame_t *frame, xlator_t *this,
subvol = dht_subvol_get_hashed (this, loc);
if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in layout for path=%s",
+ loc->path);
+
op_errno = ENOENT;
goto err;
}
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path,
- subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
- goto done;
- }
- /* Choose the minimum filled volume, and create the
- files there */
- avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
- if (avail_subvol != subvol) {
- local->params = dict_ref (params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_msg_trace (this->name, 0,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
- this, avail_subvol, subvol, loc);
- goto done;
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a create call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+
+ gf_msg_debug (this->name, 0, "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s", subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy (&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
+ }
+
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
+ loc_wipe (&local->loc);
+
+ ret = dht_build_parent_loc (this, &local->loc, loc,
+ &op_errno);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "parent loc build failed");
+ goto err;
+ }
+
+ ret = dht_create_lock (frame, subvol);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
+ }
+
+ goto done;
+ }
+ }
}
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
+
+
+ dht_create_wind_to_avail_subvol (frame, this, subvol, loc, flags, mode,
+ umask, fd, params);
done:
return 0;
err:
+
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
NULL, NULL, NULL);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 5e86b32263c..f583d30599d 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -44,6 +44,10 @@ typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, xlator_t *dst_node,
call_frame_t *frame);
+typedef int (*dht_refresh_layout_unlock) (call_frame_t *frame, xlator_t *this,
+ int op_ret);
+
+typedef int (*dht_refresh_layout_done_handle) (call_frame_t *frame);
struct dht_layout {
int spread_cnt; /* layout spread count per directory,
@@ -212,6 +216,10 @@ struct dht_local {
gf_boolean_t force_mkdir;
dht_layout_t *layout, *refreshed_layout;
} selfheal;
+
+ dht_refresh_layout_unlock refresh_layout_unlock;
+ dht_refresh_layout_done_handle refresh_layout_done;
+
uint32_t uid;
uint32_t gid;
@@ -510,6 +518,7 @@ typedef struct dht_migrate_info {
GF_REF_DECL;
} dht_migrate_info_t;
+
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
@@ -1067,4 +1076,6 @@ dht_layout_sort (dht_layout_t *layout);
int
dht_layout_missing_dirs (dht_layout_t *layout);
+int
+dht_refresh_layout (call_frame_t *frame);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 000494c812e..2a9ad37f452 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -339,7 +339,8 @@ out:
}
static inline
-int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
+int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this,
+ dht_layout_t *layout)
{
int ret = -1;
int i = 0;
@@ -355,6 +356,17 @@ int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
goto out;
}
}
+
+ /* discard decommissioned subvol */
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == this)
+ ret = -1;
+ goto out;
+ }
+ }
+
ret = 0;
out:
return ret;
@@ -376,8 +388,9 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
conf = this->private;
for(i=0; i < conf->subvolume_cnt; i++) {
- /* check if subvol has layout errors, before selecting it */
- ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
layout);
if (ignore_subvol)
continue;
@@ -424,8 +437,10 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
conf = this->private;
for (i = 0; i < conf->subvolume_cnt; i++) {
- /* check if subvol has layout errors, before selecting it */
- ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it*/
+
+ ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
layout);
if (ignore_subvol)
continue;
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index a2471701a0b..a3e6d99de14 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -120,7 +120,7 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
int need_unref = 0;
int ret = 0;
dht_conf_t *conf = this->private;
- char gfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
local = frame->local;
local->linkfile.linkfile_cbk = linkfile_cbk;
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index cfe7e5a63ce..cd1d97f7fc0 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -214,7 +214,7 @@ unlock:
if (is_last_call (this_call_cnt)) {
if (local->op_ret == 0) {
- dht_refresh_layout_done (frame);
+ local->refresh_layout_done (frame);
} else {
goto err;
}
@@ -224,7 +224,8 @@ unlock:
return 0;
err:
- dht_selfheal_dir_finish (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1);
+
return 0;
}
@@ -290,7 +291,7 @@ dht_refresh_layout (call_frame_t *frame)
return 0;
out:
- dht_selfheal_dir_finish (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1);
return 0;
}
@@ -299,10 +300,21 @@ int32_t
dht_selfheal_layout_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
if (op_ret < 0) {
goto err;
}
+ local->refresh_layout_unlock = dht_selfheal_dir_finish;
+ local->refresh_layout_done = dht_refresh_layout_done;
+
dht_refresh_layout (frame);
return 0;