summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-common.c
diff options
context:
space:
mode:
authorMohit Agrawal <moagrawa@redhat.com>2017-05-12 21:12:47 +0530
committerRaghavendra G <rgowdapp@redhat.com>2017-10-04 09:55:35 +0000
commit9b4de61a136b8e5ba7bf0e48690cdb1292d0dee8 (patch)
tree1a02abcc647793c1caf9793f5e095dc30244482a /xlators/cluster/dht/src/dht-common.c
parentd9cd579a38abb507726ecece8ae4447077709747 (diff)
cluster/dht : User xattrs are not healed after brick stop/start
Problem: In a distributed volume custom extended attribute value for a directory does not display correct value after stop/start or added newly brick. If any extended(acl) attribute value is set for a directory after stop/added the brick the attribute(user|acl|quota) value is not updated on brick after start the brick. Solution: First store hashed subvol or subvol(has internal xattr) on inode ctx and consider it as a MDS subvol.At the time of update custom xattr (user,quota,acl, selinux) on directory first check the mds from inode ctx, if mds is not present on inode ctx then throw EINVAL error to application otherwise set xattr on MDS subvol with internal xattr value of -1 and then try to update the attribute on other non MDS volumes also.If mds subvol is down in that case throw an error "Transport endpoint is not connected". In dht_dir_lookup_cbk| dht_revalidate_cbk|dht_discover_complete call dht_call_dir_xattr_heal to heal custom extended attribute. In case of gnfs server if hashed subvol has not found based on loc then wind a call on all subvol to update xattr. Fix: 1) Save MDS subvol on inode ctx 2) Check if mds subvol is present on inode ctx 3) If mds subvol is down then call unwind with error ENOTCONN and if it is up then set new xattr "GF_DHT_XATTR_MDS" to -1 and wind a call on other subvol. 4) If setxattr fop is successful on non-mds subvol then increment the value of internal xattr to +1 5) At the time of directory_lookup check the value of new xattr GF_DHT_XATTR_MDS 6) If value is not 0 in dht_lookup_dir_cbk(other cbk) functions then call heal function to heal user xattr 7) syncop_setxattr on hashed_subvol to reset the value of xattr to 0 if heal is successful on all subvol. Test : To reproduce the issue followed below steps 1) Create a distributed volume and create mount point 2) Create some directory from mount point mkdir tmp{1..5} 3) Kill any one brick from the volume 4) Set extended attribute from mount point on directory setfattr -n user.foo -v "abc" ./tmp{1..5} It will throw error " Transport End point is not connected " for those hashed subvol is down 5) Start volume with force option to start brick process 6) Execute getfattr command on mount point for directory 7) Check extended attribute on brick getfattr -n user.foo <volume-location>/tmp{1..5} It shows correct value for directories for those xattr fop were executed successfully. Note: The patch will resolve xattr healing problem only for fuse mount not for nfs mount. BUG: 1371806 Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> Change-Id: I4eb137eace24a8cb796712b742f1d177a65343d5
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
-rw-r--r--xlators/cluster/dht/src/dht-common.c1338
1 files changed, 1275 insertions, 63 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 185eeb25f5d..1cc2f08abfb 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -18,7 +18,6 @@
#include "dht-lock.h"
#include "defaults.h"
#include "byte-order.h"
-#include "glusterfs-acl.h"
#include "quota-common-utils.h"
#include "upcall-utils.h"
@@ -64,6 +63,24 @@ int32_t dht_set_fixed_dir_stat (struct iatt *stat)
int
dht_rmdir_unlock (call_frame_t *frame, xlator_t *this);
+char *xattrs_to_heal[] = {
+ "user.",
+ POSIX_ACL_ACCESS_XATTR,
+ POSIX_ACL_DEFAULT_XATTR,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ GF_SELINUX_XATTR_KEY,
+ NULL
+};
+
+/* Return true if key exists in array
+*/
+static gf_boolean_t
+dht_match_xattr (const char *key)
+{
+ return gf_get_index_by_elem (xattrs_to_heal, (char *)key) >= 0;
+}
+
int
dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value)
{
@@ -159,7 +176,7 @@ int add_opt(char **optsp, const char *opt)
}
/* Return Choice list from Split brain status */
-char *
+static char *
getChoices (const char *value)
{
int i = 0;
@@ -382,6 +399,74 @@ out:
return;
}
+/* Code to save hashed subvol on inode ctx as a mds subvol
+*/
+int
+dht_inode_ctx_mdsvol_set (inode_t *inode, xlator_t *this, xlator_t *mds_subvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t ctx_int = 0;
+ gf_boolean_t ctx_free = _gf_false;
+
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this , &ctx_int);
+ if (ctx_int) {
+ ctx = (dht_inode_ctx_t *)ctx_int;
+ ctx->mds_subvol = mds_subvol;
+ } else {
+ ctx = GF_CALLOC (1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ goto unlock;
+ ctx->mds_subvol = mds_subvol;
+ ctx_free = _gf_true;
+ ctx_int = (long) ctx;
+ ret = __inode_ctx_set (inode, this, &ctx_int);
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (ret && ctx_free)
+ GF_FREE (ctx);
+ return ret;
+}
+
+/*Code to get mds subvol from inode ctx */
+
+int
+dht_inode_ctx_mdsvol_get (inode_t *inode, xlator_t *this, xlator_t **mdsvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ if (!mdsvol)
+ return ret;
+
+ if (__is_root_gfid(inode->gfid)) {
+ (*mdsvol) = FIRST_CHILD (this);
+ return 0;
+ }
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->mds_subvol) {
+ *mdsvol = ctx->mds_subvol;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+
+
+
+
/* TODO:
- use volumename in xattr instead of "dht"
- use NS locks
@@ -397,6 +482,7 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
GF_VALIDATE_OR_GOTO ("dht", frame, out);
@@ -404,6 +490,7 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
local = frame->local;
+ conf = this->private;
ret = op_ret;
FRAME_SU_UNDO (frame, dht_local_t);
@@ -421,6 +508,8 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
dht_set_fixed_dir_stat (&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr);
DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
&local->stbuf, local->xattr, &local->postparent);
@@ -446,10 +535,12 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
int i = 0;
loc_t loc = {0 };
int8_t is_read_only = 0, layout_anomalies = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
local = discover_frame->local;
layout = local->layout;
conf = this->private;
+ gf_uuid_unparse(local->gfid, gfid_local);
LOCK(&discover_frame->lock);
{
@@ -461,6 +552,18 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
if (!main_frame)
return 0;
+ /* Code to update all extended attributed from
+ subvol to local->xattr on that internal xattr has found
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr (this, local, local->xattr,
+ local->mds_xattr, NULL, NULL);
+ dict_unref (local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
ret = dict_get_int8 (local->xattr_req, QUOTA_READ_ONLY_KEY,
&is_read_only);
if (ret < 0)
@@ -529,6 +632,26 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
}
}
+ if (IA_ISDIR (local->stbuf.ia_type)) {
+ /* Call function to save hashed subvol on inode ctx if
+ internal mds xattr is not present and all subvols are up
+ */
+ if (!local->op_ret && !__is_root_gfid (local->stbuf.ia_gfid))
+ (void) dht_mark_mds_subvolume (discover_frame, this);
+
+ if (local->need_xattr_heal && !heal_path) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal (this, local);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR,
+ ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "xattr heal failed for "
+ "directory gfid is %s ",
+ gfid_local);
+ }
+ }
+
if (source && (heal_path || layout_anomalies)) {
gf_uuid_copy (loc.gfid, local->gfid);
if (gf_uuid_is_null (loc.gfid)) {
@@ -575,10 +698,14 @@ cleanup:
}
done:
dht_set_fixed_dir_stat (&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr);
+
DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
return 0;
+
out:
DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
NULL);
@@ -587,6 +714,170 @@ out:
}
int
+dht_mds_internal_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+
+ local = frame->local;
+ hashed_subvol = cookie;
+ conf = this->private;
+
+ if (op_ret) {
+ gf_msg_debug (this->name, op_ret,
+ "Failed to set %s on the MDS for path %s. ",
+ conf->mds_xattr_key, local->loc.path);
+ } else {
+ /* Save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set (local->inode, this,
+ hashed_subvol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set mds subvol on inode ctx"
+ " %s for %s", hashed_subvol->name,
+ local->loc.path);
+ }
+ }
+out:
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+
+
+/* Code to save hashed subvol on inode ctx only while no
+ mds xattr is availble and all subvols are up for fresh
+*/
+int
+dht_mark_mds_subvolume (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int i = 0;
+ gf_boolean_t vol_down = _gf_false;
+ dht_conf_t *conf = 0;
+ int ret = -1;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ dict_t *xattrs = NULL;
+ dht_local_t *copy_local = NULL;
+ call_frame_t *xattr_frame = NULL;
+ int32_t zero[1] = {0};
+
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ local = frame->local;
+ conf = this->private;
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+
+ /* Code to update hashed subvol consider as a mds subvol
+ and save on inode ctx if all subvols are up and no internal
+ xattr has been set yet
+ */
+ if (!dict_get (local->xattr, conf->mds_xattr_key)) {
+ /* It means no internal MDS xattr has been set yet
+ */
+ /* Check the status of all subvol are up
+ */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ vol_down = _gf_true;
+ break;
+ }
+ }
+ if (vol_down) {
+ ret = 0;
+ gf_msg_debug (this->name, 0,
+ "subvol %s is down. Unable to "
+ " save mds subvol on inode for "
+ " path %s gfid is %s " ,
+ conf->subvolumes[i]->name, local->loc.path,
+ gfid_local);
+ goto out;
+ }
+ /* Calculate hashed subvol based on inode and
+ parent inode
+ */
+ hashed_subvol = dht_inode_get_hashed_subvol (local->inode,
+ this, &local->loc);
+ if (!hashed_subvol) {
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for path %s"
+ " gfid is %s ",
+ local->loc.path, gfid_local);
+ } else {
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array (xattrs, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary"
+ " value:key = %s for "
+ "path %s", conf->mds_xattr_key,
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+ xattr_frame = create_frame (this, this->ctx->pool);
+ if (!xattr_frame) {
+ ret = -1;
+ goto out;
+ }
+ copy_local = dht_local_init (xattr_frame, &(local->loc),
+ NULL, 0);
+ if (!copy_local) {
+ ret = -1;
+ DHT_STACK_DESTROY (xattr_frame);
+ goto out;
+ }
+ copy_local->stbuf = local->stbuf;
+ if (!copy_local->inode)
+ copy_local->inode = inode_ref (local->inode);
+ gf_uuid_copy (copy_local->loc.gfid, local->gfid);
+ STACK_WIND_COOKIE (xattr_frame, dht_mds_internal_setxattr_cbk,
+ hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->setxattr,
+ &local->loc, xattrs, 0, NULL);
+ ret = 0;
+ }
+ } else {
+ ret = 0;
+ gf_msg_debug (this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s " , conf->mds_xattr_key,
+ local->loc.path, gfid_local);
+ }
+
+
+out:
+ if (xattrs)
+ dict_unref (xattrs);
+ return ret;
+}
+
+
+
+int
dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, dict_t *xattr,
@@ -598,11 +889,15 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_layout_t *layout = NULL;
int ret = -1;
int is_dir = 0;
+ int32_t check_mds = 0;
int is_linkfile = 0;
int attempt_unwind = 0;
dht_conf_t *conf = 0;
- char gfid_local[GF_UUID_BUF_SIZE] = {0};
- char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+ int errst = 0;
+
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -697,6 +992,41 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_iatt_merge (this, &local->stbuf, stbuf, prev);
dht_iatt_merge (this, &local->postparent, postparent,
prev);
+ if (!dict_get (xattr, conf->mds_xattr_key)) {
+ goto unlock;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s " ,
+ conf->mds_xattr_key,
+ local->loc.path, gfid_local);
+ }
+ check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set (local->inode, this,
+ prev);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+
+ if ((check_mds < 0) && !errst) {
+ local->mds_xattr = dict_ref (xattr);
+ gf_msg_debug (this->name, 0,
+ "Value of %s is not zero on mds subvol"
+ "so xattr needs to be healed on non mds"
+ " path is %s and vol name is %s "
+ " gfid is %s" ,
+ conf->mds_xattr_key,
+ local->loc.path,
+ prev->name, gfid_local);
+ local->need_xattr_heal = 1;
+ local->mds_subvol = prev;
+ }
+
}
unlock:
UNLOCK (&frame->lock);
@@ -795,6 +1125,99 @@ err:
return 0;
}
+/* Get the value of key from dict in the bytewise and save in array after
+ convert from network byte order to host byte order
+*/
+int32_t
+dht_dict_get_array (dict_t *dict, char *key, int32_t value[], int32_t size, int *errst)
+{
+ void *ptr = NULL;
+ int32_t len = -1;
+ int32_t vindex = -1;
+ int32_t err = -1;
+ int ret = 0;
+
+ if (dict == NULL) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ (*errst) = -1;
+ return err;
+ }
+
+ if (len != (size * sizeof (int32_t))) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+
+ memset (value, 0, size * sizeof(int32_t));
+ for (vindex = 0; vindex < size; vindex++) {
+ value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
+ if (value[vindex] < 0)
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+/* Code to call syntask to heal custom xattr from hashed subvol
+ to non hashed subvol
+*/
+int
+dht_dir_xattr_heal (xlator_t *this, dht_local_t *local)
+{
+ dht_local_t *copy_local = NULL;
+ call_frame_t *copy = NULL;
+ int ret = -1;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ if (local->gfid) {
+ gf_uuid_unparse(local->gfid, gfid_local);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "No gfid exists for path %s "
+ "so healing xattr is not possible",
+ local->loc.path);
+ goto out;
+ }
+
+ copy = create_frame (this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init (copy, &(local->loc), NULL, 0);
+ if (!copy_local) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Memory allocation failed "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ DHT_STACK_DESTROY (copy);
+ } else {
+ copy_local->stbuf = local->stbuf;
+ gf_uuid_copy (copy_local->loc.gfid, local->gfid);
+ copy_local->mds_subvol = local->mds_subvol;
+ FRAME_SU_DO (copy, dht_local_t);
+ ret = synctask_new (this->ctx->env, dht_dir_heal_xattrs,
+ dht_dir_heal_xattrs_done,
+ copy, copy);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Synctask creation failed to heal xattr "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ DHT_STACK_DESTROY (copy);
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+
int
dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -803,13 +1226,17 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postparent)
{
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int this_call_cnt = 0;
xlator_t *prev = NULL;
dht_layout_t *layout = NULL;
int ret = -1;
int is_dir = 0;
- char gfid_local[GF_UUID_BUF_SIZE] = {0};
- char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t check_mds = 0;
+ int errst = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -819,17 +1246,20 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+ conf = this->private;
layout = local->layout;
- if (!op_ret && gf_uuid_is_null (local->gfid))
+ if (!op_ret && gf_uuid_is_null (local->gfid)) {
memcpy (local->gfid, stbuf->ia_gfid, 16);
+ }
+ if (local->gfid)
+ gf_uuid_unparse(local->gfid, gfid_local);
/* Check if the gfid is different for file from other node */
if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
- gf_uuid_unparse(local->gfid, gfid_local);
gf_msg (this->name, GF_LOG_WARNING, 0,
DHT_MSG_GFID_MISMATCH,
@@ -884,6 +1314,41 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_iatt_merge (this, &local->stbuf, stbuf, prev);
dht_iatt_merge (this, &local->postparent, postparent, prev);
+
+ if (!dict_get (xattr, conf->mds_xattr_key)) {
+ gf_msg_debug (this->name, 0,
+ "Internal xattr %s is not present "
+ " on path %s gfid is %s " ,
+ conf->mds_xattr_key,
+ local->loc.path, gfid_local);
+ goto unlock;
+ } else {
+ /* Save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set (local->inode, this,
+ prev);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+ }
+ check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ if ((check_mds < 0) && !errst) {
+ local->mds_xattr = dict_ref (xattr);
+ gf_msg_debug (this->name, 0,
+ "Value of %s is not zero on hashed subvol "
+ "so xattr needs to be heal on non hashed"
+ " path is %s and vol name is %s "
+ " gfid is %s" ,
+ conf->mds_xattr_key,
+ local->loc.path,
+ prev->name, gfid_local);
+ local->need_xattr_heal = 1;
+ local->mds_subvol = prev;
+ }
+
}
unlock:
UNLOCK (&frame->lock);
@@ -892,7 +1357,20 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- gf_uuid_copy (local->loc.gfid, local->gfid);
+ /* No need to call xattr heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+
+ /* Code to update all extended attributed from hashed subvol
+ to local->xattr
+ */
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr (this, local, local->xattr,
+ local->mds_xattr, NULL, NULL);
+ dict_unref (local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
if (local->need_selfheal) {
local->need_selfheal = 0;
@@ -911,6 +1389,9 @@ unlock:
}
dht_layout_set (this, local->inode, layout);
+ if (!dict_get (local->xattr, conf->mds_xattr_key) ||
+ local->need_xattr_heal)
+ goto selfheal;
}
if (local->inode) {
@@ -925,6 +1406,8 @@ unlock:
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
dht_set_fixed_dir_stat (&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
@@ -981,6 +1464,9 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
char gfid[GF_UUID_BUF_SIZE] = {0};
uint32_t vol_commit_hash = 0;
xlator_t *subvol = NULL;
+ int32_t check_mds = 0;
+ int errst = 0;
+ int32_t mds_xattr_val[1] = {0};
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -1005,6 +1491,9 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
+ if (gf_uuid_is_null (local->gfid)) {
+ memcpy (local->gfid, local->loc.gfid, 16);
+ }
gf_msg_debug (this->name, op_errno,
"revalidate lookup of %s "
@@ -1090,6 +1579,7 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf.ia_prot = stbuf->ia_prot;
}
}
+
if (local->stbuf.ia_type != IA_INVAL)
{
if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
@@ -1100,6 +1590,44 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->need_selfheal = 1;
}
}
+ if (!dict_get (xattr, conf->mds_xattr_key)) {
+ gf_msg_debug (this->name, 0,
+ "internal xattr %s is not present"
+ " on path %s gfid is %s " ,
+ conf->mds_xattr_key,
+ local->loc.path, gfid);
+ } else {
+ check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ if (local->mds_subvol == prev) {
+ local->mds_stbuf.ia_gid = stbuf->ia_gid;
+ local->mds_stbuf.ia_uid = stbuf->ia_uid;
+ local->mds_stbuf.ia_prot = stbuf->ia_prot;
+ }
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set (local->inode, this,
+ prev);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set MDS subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+ if ((check_mds < 0) && !errst) {
+ local->mds_xattr = dict_ref (xattr);
+ gf_msg_debug (this->name, 0,
+ "Value of %s is not zero on "
+ "hashed subvol so xattr needs to"
+ " be healed on non hashed"
+ " path is %s and vol name is %s "
+ " gfid is %s" ,
+ conf->mds_xattr_key,
+ local->loc.path,
+ prev->name, gfid);
+ local->need_xattr_heal = 1;
+ local->mds_subvol = prev;
+ }
+ }
ret = dht_layout_dir_mismatch (this, layout,
prev, &local->loc,
xattr);
@@ -1169,13 +1697,52 @@ out:
&& (conf && conf->unhashed_sticky_bit)) {
local->stbuf.ia_prot.sticky = 1;
}
+ /* No need to call heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+
+ /* Code to update all extended attributed from hashed subvol
+ to local->xattr
+ */
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr (this, local, local->xattr,
+ local->mds_xattr, NULL, NULL);
+ dict_unref (local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+ /* Call function to save hashed subvol on inode ctx if
+ internal mds xattr is not present and all subvols are up
+ */
+ if (inode && !__is_root_gfid (inode->gfid) &&
+ (!local->op_ret) && (IA_ISDIR (local->stbuf.ia_type)))
+ (void) dht_mark_mds_subvolume (frame, this);
+
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal (this, local);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR,
+ ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "xattr heal failed for directory %s "
+ " gfid %s ", local->loc.path,
+ gfid);
+ }
if (local->need_selfheal) {
local->need_selfheal = 0;
- gf_uuid_copy (local->gfid, local->stbuf.ia_gfid);
- local->stbuf.ia_gid = local->prebuf.ia_gid;
- local->stbuf.ia_uid = local->prebuf.ia_uid;
- if (__is_root_gfid(local->stbuf.ia_gfid))
+ if (!__is_root_gfid (inode->gfid)) {
+ gf_uuid_copy (local->gfid, local->mds_stbuf.ia_gfid);
+ if (local->mds_stbuf.ia_gid || local->mds_stbuf.ia_uid) {
+ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+ }
+ } else {
+ gf_uuid_copy (local->gfid, local->stbuf.ia_gfid);
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
local->stbuf.ia_prot = local->prebuf.ia_prot;
+ }
+
copy = create_frame (this, this->ctx->pool);
if (copy) {
copy_local = dht_local_init (copy, &local->loc,
@@ -1183,6 +1750,8 @@ out:
if (!copy_local)
goto cont;
copy_local->stbuf = local->stbuf;
+ copy_local->mds_stbuf = local->mds_stbuf;
+ copy_local->mds_subvol = local->mds_subvol;
copy->local = copy_local;
FRAME_SU_DO (copy, dht_local_t);
ret = synctask_new (this->ctx->env,
@@ -1237,6 +1806,8 @@ cont:
local->op_ret = -1;
local->op_errno = ESTALE;
}
+ /* Delete mds xattr at the time of STACK UNWIND */
+ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
@@ -2253,6 +2824,62 @@ out:
}
+/* Code to get hashed subvol based on inode and loc
+ First it check if loc->parent and loc->path exist then it get
+ hashed subvol based on loc.
+*/
+
+xlator_t *
+dht_inode_get_hashed_subvol (inode_t *inode, xlator_t *this, loc_t *loc)
+{
+ char *path = NULL;
+ loc_t populate_loc = {0, };
+ char *name = NULL;
+ xlator_t *hash_subvol = NULL;
+
+ if (!inode)
+ return hash_subvol;
+
+ if (loc && loc->parent && loc->path) {
+ if (!loc->name) {
+ name = strrchr (loc->path, '/');
+ if (name) {
+ loc->name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed (this, loc);
+ goto out;
+ }
+
+ if (!gf_uuid_is_null (inode->gfid)) {
+ populate_loc.inode = inode_ref (inode);
+ populate_loc.parent = inode_parent (populate_loc.inode,
+ NULL, NULL);
+ inode_path (populate_loc.inode, NULL, &path);
+
+ if (!path)
+ goto out;
+
+ populate_loc.path = path;
+ if (!populate_loc.name && populate_loc.path) {
+ name = strrchr (populate_loc.path, '/');
+ if (name) {
+ populate_loc.name = name + 1;
+
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed (this, &populate_loc);
+ }
+out:
+ if (populate_loc.inode)
+ loc_wipe (&populate_loc);
+ return hash_subvol;
+}
+
int
dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -2481,6 +3108,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
{
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
+ xlator_t *mds_subvol = NULL;
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int ret = -1;
@@ -2531,6 +3159,15 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
local->xattr_req = dict_new ();
}
+ ret = dict_set_uint32 (local->xattr_req, conf->mds_xattr_key, 4);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", conf->mds_xattr_key, loc->path);
+ }
+
if (gf_uuid_is_null (loc->pargfid) && !gf_uuid_is_null (loc->gfid) &&
!__is_root_gfid (loc->inode->gfid)) {
local->cached_subvol = NULL;
@@ -2601,6 +3238,14 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
goto err;
}
if (IA_ISDIR (local->inode->ia_type)) {
+ ret = dht_inode_ctx_mdsvol_get (local->inode, this,
+ &mds_subvol);
+ if (ret || !mds_subvol) {
+ gf_msg_debug (this->name, 0,
+ "Failed to get mds subvol for path %s",
+ local->loc.path);
+ }
+ local->mds_subvol = mds_subvol;
local->call_cnt = call_cnt = conf->subvolume_cnt;
for (i = 0; i < call_cnt; i++) {
STACK_WIND_COOKIE (frame, dht_revalidate_cbk,
@@ -2804,18 +3449,17 @@ unlock:
if (!local->op_ret) {
hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- if (hashed_subvol &&
- hashed_subvol != local->cached_subvol) {
+ if (hashed_subvol && hashed_subvol != local->cached_subvol) {
/*
* If hashed and cached are different, then we need
* to unlink linkfile from hashed subvol if data
* file is deleted successfully
*/
- STACK_WIND_COOKIE (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol,
- hashed_subvol->fops->unlink, &local->loc,
- local->flags, xdata);
- return 0;
+ STACK_WIND_COOKIE (frame, dht_unlink_linkfile_cbk,
+ hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->unlink,
+ &local->loc, local->flags, xdata);
+ return 0;
}
}
@@ -2827,6 +3471,17 @@ unlock:
return 0;
}
+static int
+dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+
int
dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
@@ -2862,6 +3517,256 @@ unlock:
return 0;
}
+/* Set the value[] of key into dict after convert from
+ host byte order to network byte order
+*/
+int32_t dht_dict_set_array (dict_t *dict, char *key, int32_t value[],
+ int32_t size)
+{
+ int ret = -1;
+ int32_t *ptr = NULL;
+ int32_t vindex;
+
+ if (value == NULL) {
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(int32_t) * size, gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ for (vindex = 0; vindex < size; vindex++) {
+ ptr[vindex] = hton32(value[vindex]);
+ }
+ ret = dict_set_bin(dict, key, ptr, sizeof(int32_t) * size);
+ if (ret)
+ GF_FREE (ptr);
+ return ret;
+}
+
+int
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = cookie;
+
+ local = frame->local;
+
+ if (op_ret)
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+
+ DHT_STACK_UNWIND (setxattr, frame, 0, op_errno, local->xdata);
+ return 0;
+}
+
+int
+dht_common_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = cookie;
+
+ local = frame->local;
+
+ if (op_ret)
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+
+ DHT_STACK_UNWIND (fsetxattr, frame, 0, op_errno, local->xdata);
+ return 0;
+}
+
+/* Code to wind a xattrop call to add 1 on current mds internal xattr
+ value
+*/
+int
+dht_setxattr_non_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ dict_t *xattrop = NULL;
+ int32_t addone[1] = {1};
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ }
+ }
+ UNLOCK (&frame->lock);
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ if (!local->op_ret) {
+ xattrop = dict_new ();
+ if (!xattrop) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ DHT_MSG_NO_MEMORY, 0,
+ "dictionary creation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dht_dict_set_array (xattrop,
+ conf->mds_xattr_key,
+ addone, 1);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed ");
+ ret = -1;
+ goto out;
+ }
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND (frame, dht_common_xattrop_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->xattrop,
+ &local->loc, GF_XATTROP_ADD_ARRAY,
+ xattrop, NULL);
+ } else {
+ STACK_WIND (frame, dht_common_fxattrop_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->fxattrop,
+ local->fd, GF_XATTROP_ADD_ARRAY,
+ xattrop, NULL);
+ }
+ } else {
+ if (local->fop == GF_FOP_SETXATTR)
+ DHT_STACK_UNWIND (setxattr, frame, 0, 0, local->xdata);
+ else
+ DHT_STACK_UNWIND (fsetxattr, frame, 0, 0, local->xdata);
+ }
+ }
+out:
+ if (xattrop)
+ dict_unref (xattrop);
+ if (ret) {
+ if (local->fop == GF_FOP_SETXATTR)
+ DHT_STACK_UNWIND (setxattr, frame, 0, 0, local->xdata);
+ else
+ DHT_STACK_UNWIND (fsetxattr, frame, 0, 0, local->xdata);
+ }
+ return 0;
+}
+
+
+int
+dht_setxattr_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *mds_subvol = NULL;
+ int i = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ mds_subvol = local->mds_subvol;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ local->op_ret = 0;
+ local->call_cnt = conf->subvolume_cnt - 1;
+ local->xdata = dict_ref (xdata);
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol && (mds_subvol == conf->subvolumes[i]))
+ continue;
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND (frame, dht_setxattr_non_mds_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->setxattr,
+ &local->loc, local->xattr,
+ local->flags, local->xattr_req);
+ } else {
+ STACK_WIND (frame, dht_setxattr_non_mds_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsetxattr,
+ local->fd, local->xattr,
+ local->flags, local->xattr_req);
+ }
+ }
+
+ return 0;
+out:
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ } else {
+ DHT_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ }
+
+ return 0;
+}
+
+int
+dht_xattrop_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND (frame, dht_setxattr_mds_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->setxattr,
+ &local->loc, local->xattr,
+ local->flags, local->xattr_req);
+ } else {
+ STACK_WIND (frame, dht_setxattr_mds_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->fsetxattr,
+ local->fd, local->xattr,
+ local->flags, local->xattr_req);
+ }
+ return 0;
+out:
+ if (local->fop == GF_FOP_SETXATTR)
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ else
+ DHT_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ return 0;
+}
+
static void
fill_layout_info (dht_layout_t *layout, char *buf)
{
@@ -3303,6 +4208,41 @@ dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+
+int
+dht_mds_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto out;
+ }
+ if (dict_get (xattr, conf->xattr_name)) {
+ dict_del (xattr, conf->xattr_name);
+ }
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref (xattr, NULL);
+ }
+
+out:
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, xdata);
+ return 0;
+}
+
+
int
dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
@@ -3532,6 +4472,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
+ xlator_t *mds_subvol = NULL;
xlator_t *cached_subvol = NULL;
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
@@ -3574,6 +4515,12 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
if (key &&
+ (strncmp (key, conf->mds_xattr_key, strlen(key)) == 0)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ if (key &&
(strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
&& DHT_IS_DIR(layout)) {
@@ -3703,26 +4650,53 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
- if (key && (!strcmp (QUOTA_LIMIT_KEY, key) ||
- !strcmp (QUOTA_LIMIT_OBJECTS_KEY, key))) {
- /* quota hardlimit and aggregated size of a directory is stored
- * in inode contexts of each brick. Hence its good enough that
- * we send getxattr for this key to any brick.
- */
- local->call_cnt = 1;
- subvol = dht_first_up_subvol (this);
- STACK_WIND (frame, dht_getxattr_cbk, subvol,
- subvol->fops->getxattr, loc, key, xdata);
- return 0;
- }
-
if (cluster_handle_marker_getxattr (frame, loc, key, conf->vol_uuid,
dht_getxattr_unwind,
dht_marker_populate_args) == 0)
return 0;
if (DHT_IS_DIR(layout)) {
- cnt = local->call_cnt = layout->cnt;
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol);
+ if (!mds_subvol) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Cannot determine MDS, fetching xattr %s randomly"
+ " from a subvol for path %s ", key, loc->path);
+ } else {
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of mds_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg (this->name,
+ GF_LOG_INFO, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS %s is down for path"
+ " path %s so fetching xattr "
+ "%s randomly from a subvol ",
+ local->mds_subvol->name,
+ loc->path, key);
+ ret = 1;
+ }
+ }
+ }
+ }
+
+ if (!ret && key && local->mds_subvol && dht_match_xattr (key)) {
+ STACK_WIND (frame, dht_mds_getxattr_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->getxattr,
+ loc, key, xdata);
+
+ return 0;
+ }
} else {
cnt = local->call_cnt = 1;
}
@@ -3753,6 +4727,10 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this,
int op_errno = -1;
int i = 0;
int cnt = 0;
+ xlator_t *mds_subvol = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -3760,6 +4738,8 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd->inode, err);
VALIDATE_OR_GOTO (this->private, err);
+ conf = this->private;
+
local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
if (!local) {
op_errno = ENOMEM;
@@ -3784,15 +4764,63 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this,
}
}
+ if (fd->inode)
+ gf_uuid_unparse(fd->inode->gfid, gfid);
+
if ((fd->inode->ia_type == IA_IFDIR)
&& key
&& (strncmp (key, GF_XATTR_LOCKINFO_KEY,
strlen (GF_XATTR_LOCKINFO_KEY)) != 0)) {
- cnt = local->call_cnt = layout->cnt;
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get (fd->inode, this, &mds_subvol);
+
+ if (!mds_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "cannot determine MDS, fetching xattr %s "
+ " randomly from a subvol for gfid %s ",
+ key, gfid);
+ } else {
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of hashed_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg (this->name,
+ GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvolume %s is down"
+ " for gfid %s so fetching xattr "
+ " %s randomly from a subvol ",
+ local->mds_subvol->name,
+ gfid, key);
+ ret = 1;
+ }
+ }
+ }
+ }
+
+ if (!ret && key && local->mds_subvol &&
+ dht_match_xattr (key)) {
+ STACK_WIND (frame, dht_mds_getxattr_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->fgetxattr,
+ fd, key, NULL);
+
+ return 0;
+ }
+
} else {
cnt = local->call_cnt = 1;
}
+
for (i = 0; i < cnt; i++) {
subvol = layout->list[i].xlator;
STACK_WIND (frame, dht_getxattr_cbk,
@@ -3888,6 +4916,169 @@ out:
return 0;
}
+/* Function is call by dict_foreach_fnmatch if key is match with
+ user.* and set boolean flag to true
+*/
+static int
+dht_is_user_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *user_xattr_found = data;
+ *user_xattr_found = _gf_true;
+ return 0;
+}
+
+
+/* Common code to wind a (f)setxattr call to set xattr on directory
+*/
+int
+dht_dir_common_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata,
+ int *op_errno)
+
+{
+ dict_t *xattrop = NULL;
+ int32_t subone[1] = {-1};
+ gf_boolean_t uxattr_key_found = _gf_false;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *travvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_cnt = 0;
+ dht_local_t *local = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ conf = this->private;
+ local = frame->local;
+ call_cnt = conf->subvolume_cnt;
+ local->flags = flags;
+
+ if (local->gfid)
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ /* Check if any user xattr present in xattr
+ */
+ dict_foreach_fnmatch (xattr, "user*", dht_is_user_xattr,
+ &uxattr_key_found);
+
+ /* Check if any custom key xattr present in dict xattr
+ and start index from 1 because user xattr already
+ checked in previous line
+ */
+ for (i = 1; xattrs_to_heal[i]; i++)
+ if (dict_get (xattr, xattrs_to_heal[i]))
+ uxattr_key_found = _gf_true;
+
+ /* If there is no custom key xattr present or gfid is root
+ or call_cnt is 1 then wind a (f)setxattr call on all subvols
+ */
+ if (!uxattr_key_found || __is_root_gfid (local->gfid) || call_cnt == 1) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ travvol = conf->subvolumes[i];
+ if (fd) {
+ STACK_WIND_COOKIE (frame, dht_err_cbk,
+ travvol, travvol,
+ travvol->fops->fsetxattr,
+ fd, xattr, flags, xdata);
+ } else {
+ STACK_WIND_COOKIE (frame, dht_err_cbk,
+ travvol, travvol,
+ travvol->fops->setxattr,
+ loc, xattr, flags, xdata);
+ }
+ }
+
+ return 0;
+ }
+
+ /* Calculate hash subvol based on inode and parent inode
+ */
+ if (fd) {
+ ret = dht_inode_ctx_mdsvol_get (fd->inode, this, &mds_subvol);
+ } else {
+ ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol);
+ }
+ if (ret || !mds_subvol) {
+ if (fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for fd %p"
+ "gfid is %s ", fd, gfid_local);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for path %s"
+ "gfid is %s ", loc->path, gfid_local);
+ }
+ (*op_errno) = ENOENT;
+ goto err;
+ }
+
+ local->mds_subvol = mds_subvol;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ 0, DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvol is down for path "
+ " %s gfid is %s Unable to set xattr " ,
+ local->loc.path, gfid_local);
+ (*op_errno) = ENOTCONN;
+ goto err;
+ }
+ }
+ }
+
+ if (uxattr_key_found) {
+ xattrop = dict_new ();
+ if (!xattrop) {
+ gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY,
+ 0, "dictionary creation failed for path %s "
+ "for gfid is %s ", local->loc.path, gfid_local);
+ (*op_errno) = ENOMEM;
+ goto err;
+ }
+ local->xattr = dict_ref (xattr);
+ /* Subtract current MDS xattr value to -1 , value of MDS
+ xattr represents no. of times xattr modification failed
+ on non MDS subvols.
+ */
+ ret = dht_dict_set_array (xattrop, conf->mds_xattr_key, subone, 1);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed for path %s "
+ "for gfid is %s ", local->loc.path, gfid_local);
+ if (xattrop)
+ dict_unref (xattrop);
+ (*op_errno) = ret;
+ goto err;
+ }
+ /* Wind a xattrop call to use ref counting approach
+ update mds xattr to -1 before update xattr on
+ hashed subvol and update mds xattr to +1 after update
+ xattr on all non hashed subvol
+ */
+ if (fd) {
+ STACK_WIND (frame, dht_xattrop_mds_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->fxattrop,
+ fd, GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ } else {
+ STACK_WIND (frame, dht_xattrop_mds_cbk,
+ local->mds_subvol,
+ local->mds_subvol->fops->xattrop,
+ loc, GF_XATTROP_ADD_ARRAY,
+ xattrop, NULL);
+ }
+ if (xattrop)
+ dict_unref (xattrop);
+ }
+
+ return 0;
+err:
+ return -1;
+}
int
@@ -3901,7 +5092,6 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
dht_layout_t *layout = NULL;
int ret = -1;
int call_cnt = 0;
- int i = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -3941,14 +5131,11 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
local->call_cnt = call_cnt = layout->cnt;
if (IA_ISDIR (fd->inode->ia_type)) {
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE (frame, dht_err_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fsetxattr,
- fd, xattr, flags, xdata);
- }
-
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_setxattr (frame, this, NULL, fd,
+ xattr, flags, xdata, &op_errno);
+ if (ret)
+ goto err;
} else {
local->call_cnt = 1;
@@ -3975,16 +5162,6 @@ err:
return 0;
}
-static int
-dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
int
dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -4122,6 +5299,7 @@ dht_nuke_dir (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
return 0;
}
+
int
dht_setxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
@@ -4141,6 +5319,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
int call_cnt = 0;
uint32_t new_hash = 0;
+
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
@@ -4180,6 +5359,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
}
local->call_cnt = call_cnt = layout->cnt;
+ tmp = dict_get (xattr, conf->mds_xattr_key);
+ if (tmp) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
tmp = dict_get (xattr, GF_XATTR_FILE_MIGRATE_KEY);
if (tmp) {
@@ -4355,15 +5539,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
local->xattr_req = xdata ? dict_ref (xdata) : dict_new ();
if (IA_ISDIR (loc->inode->ia_type)) {
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE (frame, dht_err_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags, xdata);
- }
-
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_setxattr (frame, this, loc, NULL,
+ xattr, flags, xdata, &op_errno);
+ if (ret)
+ goto err;
} else {
local->rebalance.xattr = dict_ref (xattr);
@@ -4602,6 +5782,12 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
local->call_cnt = call_cnt = layout->cnt;
local->key = gf_strdup (key);
+ if (key &&
+ (strncmp (key, conf->mds_xattr_key, strlen(key)) == 0)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
if (IA_ISDIR (loc->inode->ia_type)) {
for (i = 0; i < call_cnt; i++) {
STACK_WIND_COOKIE (frame, dht_removexattr_cbk,
@@ -7553,6 +8739,10 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
dht_iatt_merge (this, &local->postparent, postparent, prev);
local->call_cnt = conf->subvolume_cnt - 1;
+ /* Delete internal mds xattr from params dict to avoid store
+ internal mds xattr on other subvols
+ */
+ dict_del (local->params, conf->mds_xattr_key);
if (gf_uuid_is_null (local->loc.gfid))
gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid);
@@ -7564,6 +8754,14 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
&local->loc, layout);
}
+ /* Set hashed subvol as a mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set (local->inode, this, hashed_subvol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path, hashed_subvol->name);
+ }
+
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->subvolumes[i] == hashed_subvol)
continue;
@@ -7573,6 +8771,7 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
&local->loc, local->mode, local->umask,
local->params);
}
+
return 0;
err:
if (local->op_ret != 0) {
@@ -7594,9 +8793,13 @@ dht_mkdir_guard_parent_layout_cbk (call_frame_t *frame, xlator_t *this,
dict_t *params)
{
dht_local_t *local = NULL;
+ dht_conf_t *conf = 0;
char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = -1;
+ int32_t zero[1] = {0};
local = frame->local;
+ conf = this->private;
gf_uuid_unparse (loc->parent->gfid, pgfid);
@@ -7610,6 +8813,15 @@ dht_mkdir_guard_parent_layout_cbk (call_frame_t *frame, xlator_t *this,
}
local->op_ret = -1;
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array (params, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", conf->mds_xattr_key, loc->path);
+ }
STACK_WIND_COOKIE (frame, dht_mkdir_hashed_cbk, local->hashed_subvol,
local->hashed_subvol,