/* Copyright (c) 2008-2012 Red Hat, Inc. This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser General Public License, version 3 or any later version (LGPLv3 or later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ #include "mem-types.h" #include "libxlator.h" int marker_xtime_default_gauge[] = { [MCNT_FOUND] = 1, [MCNT_NOTFOUND] = -1, [MCNT_ENODATA] = -1, [MCNT_ENOTCONN] = -1, [MCNT_ENOENT] = -1, [MCNT_EOTHER] = -1, }; int marker_uuid_default_gauge[] = { [MCNT_FOUND] = 1, [MCNT_NOTFOUND] = 0, [MCNT_ENODATA] = 0, [MCNT_ENOTCONN] = 0, [MCNT_ENOENT] = 0, [MCNT_EOTHER] = 0, }; static int marker_idx_errno_map[] = { [MCNT_FOUND] = EINVAL, [MCNT_NOTFOUND] = EINVAL, [MCNT_ENOENT] = ENOENT, [MCNT_ENOTCONN] = ENOTCONN, [MCNT_ENODATA] = ENODATA, [MCNT_EOTHER] = EINVAL, [MCNT_MAX] = 0, }; /*Copy the contents of oldtimebuf to newtimbuf*/ static void update_timebuf (uint32_t *oldtimbuf, uint32_t *newtimebuf) { newtimebuf[0] = (oldtimbuf[0]); newtimebuf[1] = (oldtimbuf[1]); } /* Convert Timebuf in network order to host order */ static void get_hosttime (uint32_t *oldtimbuf, uint32_t *newtimebuf) { newtimebuf[0] = ntohl (oldtimbuf[0]); newtimebuf[1] = ntohl (oldtimbuf[1]); } /* Match the Incoming trusted.glusterfs..xtime against volume uuid */ int match_uuid_local (const char *name, char *uuid) { if (!uuid || !*uuid) return -1; name = strtail ((char *)name, MARKER_XATTR_PREFIX); if (!name || name++[0] != '.') return -1; name = strtail ((char *)name, uuid); if (!name || strcmp (name, ".xtime") != 0) return -1; return 0; } static void marker_local_incr_errcount (xl_marker_local_t *local, int op_errno) { marker_result_idx_t i = -1; if (!local) return; switch (op_errno) { case ENODATA: i = MCNT_ENODATA; break; case ENOENT: i = MCNT_ENOENT; break; case ENOTCONN: i = MCNT_ENOTCONN; break; default: i = MCNT_EOTHER; break; } local->count[i]++; } static int evaluate_marker_results (int *gauge, int *count) { int i = 0; int op_errno = 0; gf_boolean_t sane = _gf_true; /* check if the policy of the gauge is violated; * if yes, try to get the best errno, ie. look * for the first position where there is a more * specific kind of vioilation than the generic EINVAL */ for (i = 0; i < MCNT_MAX; i++) { if (sane) { if ((gauge[i] > 0 && count[i] < gauge[i]) || (gauge[i] < 0 && count[i] >= -gauge[i])) { sane = _gf_false; /* generic action: adopt corresponding errno */ op_errno = marker_idx_errno_map[i]; } } else { /* already insane; trying to get a more informative * errno by checking subsequent counters */ if (count[i] > 0) op_errno = marker_idx_errno_map[i]; } if (op_errno && op_errno != EINVAL) break; } return op_errno; } static void cluster_marker_unwind (call_frame_t *frame, char *key, void *value, size_t size, dict_t *dict) { xl_marker_local_t *local = frame->local; int ret = 0; int32_t op_ret = 0; int32_t op_errno = 0; gf_boolean_t unref = _gf_false; frame->local = local->xl_local; if (local->count[MCNT_FOUND]) { if (!dict) { dict = dict_new(); if (dict) { unref = _gf_true; } else { op_ret = -1; op_errno = ENOMEM; goto out; } } ret = dict_set_static_bin (dict, key, value, size); if (ret) { op_ret = -1; op_errno = ENOMEM; goto out; } } op_errno = evaluate_marker_results (local->gauge, local->count); if (op_errno) op_ret = -1; out: if (local->xl_specf_unwind) { local->xl_specf_unwind (frame, op_ret, op_errno, dict, NULL); } else { STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); } GF_FREE (local); if (unref) dict_unref (dict); } /* Aggregate all the .xtime attrs of the cluster and send the max*/ int32_t cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *dict, dict_t *xdata) { int32_t callcnt = 0; uint32_t *net_timebuf = NULL; uint32_t host_timebuf[2] = {0,}; char marker_xattr[128] = {0}; xl_marker_local_t *local = NULL; local = frame->local; snprintf (marker_xattr, sizeof (marker_xattr), "%s.%s.%s", MARKER_XATTR_PREFIX, local->vol_uuid, XTIME); LOCK (&frame->lock); { callcnt = --local->call_count; if (op_ret) { marker_local_incr_errcount (local, op_errno); goto unlock; } if (dict_get_ptr (dict, marker_xattr, (void **)&net_timebuf)) { gf_log (this->name, GF_LOG_WARNING, "Unable to get .xtime attr"); local->count[MCNT_NOTFOUND]++; goto unlock; } if (local->count[MCNT_FOUND]) { get_hosttime (net_timebuf, host_timebuf); if ( (host_timebuf[0]>local->host_timebuf[0]) || (host_timebuf[0] == local->host_timebuf[0] && host_timebuf[1] >= local->host_timebuf[1])) { update_timebuf (net_timebuf, local->net_timebuf); update_timebuf (host_timebuf, local->host_timebuf); } } else { get_hosttime (net_timebuf, local->host_timebuf); update_timebuf (net_timebuf, local->net_timebuf); local->count[MCNT_FOUND]++; } } unlock: UNLOCK (&frame->lock); if (callcnt == 0) cluster_marker_unwind (frame, marker_xattr, local->net_timebuf, 8, dict); return 0; } int32_t cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *dict, dict_t *xdata) { int32_t callcnt = 0; struct volume_mark *volmark = NULL; xl_marker_local_t *local = NULL; int32_t ret = -1; char *vol_uuid = NULL; local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; vol_uuid = local->vol_uuid; if (op_ret) { marker_local_incr_errcount (local, op_errno); goto unlock; } ret = dict_get_bin (dict, GF_XATTR_MARKER_KEY, (void *)&volmark); if (ret) goto unlock; if (local->count[MCNT_FOUND]) { if ((local->volmark->major != volmark->major) || (local->volmark->minor != volmark->minor)) { op_ret = -1; op_errno = EINVAL; goto unlock; } if (local->retval) { goto unlock; } else if (volmark->retval) { GF_FREE (local->volmark); local->volmark = memdup (volmark, sizeof (*volmark)); local->retval = volmark->retval; } else if ((volmark->sec > local->volmark->sec) || ((volmark->sec == local->volmark->sec) && (volmark->usec >= local->volmark->usec))) { GF_FREE (local->volmark); local->volmark = memdup (volmark, sizeof (*volmark)); } } else { local->volmark = memdup (volmark, sizeof (*volmark)); VALIDATE_OR_GOTO (local->volmark, unlock); uuid_unparse (volmark->uuid, vol_uuid); if (volmark->retval) local->retval = volmark->retval; local->count[MCNT_FOUND]++; } } unlock: UNLOCK (&frame->lock); if (callcnt == 0) cluster_marker_unwind (frame, GF_XATTR_MARKER_KEY, local->volmark, sizeof (*local->volmark), dict); return 0; } int gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value) { int ret = -1; uint32_t *net_timebuf = NULL; uint32_t *value_timebuf = NULL; uint32_t host_timebuf[2] = {0,}; uint32_t host_value_timebuf[2] = {0,}; /* stime should be minimum of all the other nodes */ ret = dict_get_bin (dst, key, (void **)&net_timebuf); if (ret < 0) { net_timebuf = GF_CALLOC (1, sizeof (int64_t), gf_common_mt_char); if (!net_timebuf) goto out; ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t)); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "key=%s: dict set failed", key); goto error; } } value_timebuf = data_to_bin (value); if (!value_timebuf) { gf_log (this->name, GF_LOG_WARNING, "key=%s: getting value of stime failed", key); ret = -1; goto out; } get_hosttime (value_timebuf, host_value_timebuf); get_hosttime (net_timebuf, host_timebuf); /* can't use 'min()' macro here as we need to compare two fields in the array, selectively */ if ((host_value_timebuf[0] < host_timebuf[0]) || ((host_value_timebuf[0] == host_timebuf[0]) && (host_value_timebuf[1] < host_timebuf[1]))) { update_timebuf (value_timebuf, net_timebuf); } ret = 0; out: return ret; error: /* To be used only when net_timebuf is not set in the dict */ if (net_timebuf) GF_FREE (net_timebuf); return ret; } int gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value) { int ret = -1; uint32_t *net_timebuf = NULL; uint32_t *value_timebuf = NULL; uint32_t host_timebuf[2] = {0,}; uint32_t host_value_timebuf[2] = {0,}; /* stime should be maximum of all the other nodes */ ret = dict_get_bin (dst, key, (void **)&net_timebuf); if (ret < 0) { net_timebuf = GF_CALLOC (1, sizeof (int64_t), gf_common_mt_char); if (!net_timebuf) goto out; ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t)); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "key=%s: dict set failed", key); goto error; } } value_timebuf = data_to_bin (value); if (!value_timebuf) { gf_log (this->name, GF_LOG_WARNING, "key=%s: getting value of stime failed", key); ret = -1; goto out; } get_hosttime (value_timebuf, host_value_timebuf); get_hosttime (net_timebuf, host_timebuf); /* can't use 'max()' macro here as we need to compare two fields in the array, selectively */ if ((host_value_timebuf[0] > host_timebuf[0]) || ((host_value_timebuf[0] == host_timebuf[0]) && (host_value_timebuf[1] > host_timebuf[1]))) { update_timebuf (value_timebuf, net_timebuf); } ret = 0; out: return ret; error: /* To be used only when net_timebuf is not set in the dict */ if (net_timebuf) GF_FREE (net_timebuf); return ret; } static int _get_children_count (xlator_t *xl) { int i = 0; xlator_list_t *trav = NULL; for (i = 0, trav = xl->children; trav ; trav = trav->next, i++) { /*'i' will have the value */ } return i; } int cluster_handle_marker_getxattr (call_frame_t *frame, loc_t *loc, const char *name, char *vol_uuid, xlator_specf_unwind_t unwind, int (*populate_args) (call_frame_t *frame, int type, int *gauge, xlator_t **subvols)) { xlator_t *this = frame->this; xlator_t **subvols = NULL; int num_subvols = 0; int type = 0; int i = 0; int gauge[MCNT_MAX] = {0}; xl_marker_local_t *local = NULL; if (GF_CLIENT_PID_GSYNCD != frame->root->pid) return -EINVAL; if (name == NULL) return -EINVAL; if (strcmp (GF_XATTR_MARKER_KEY, name) == 0) { type = MARKER_UUID_TYPE; memcpy (gauge, marker_uuid_default_gauge, sizeof (gauge)); } else if (match_uuid_local (name, vol_uuid) == 0) { type = MARKER_XTIME_TYPE; memcpy (gauge, marker_xtime_default_gauge, sizeof (gauge)); } else { return -EINVAL; } num_subvols = _get_children_count (this); subvols = alloca (num_subvols * sizeof (*subvols)); num_subvols = populate_args (frame, type, gauge, subvols); local = GF_CALLOC (sizeof (struct marker_str), 1, gf_common_mt_libxl_marker_local); if (!local) goto fail; local->xl_local = frame->local; local->call_count = num_subvols; local->xl_specf_unwind = unwind; local->vol_uuid = vol_uuid; memcpy (local->gauge, gauge, sizeof (local->gauge)); frame->local = local; for (i = 0; i < num_subvols; i++) { if (MARKER_UUID_TYPE == type) STACK_WIND (frame, cluster_markeruuid_cbk, subvols[i], subvols[i]->fops->getxattr, loc, name, NULL); else if (MARKER_XTIME_TYPE == type) STACK_WIND (frame, cluster_markerxtime_cbk, subvols[i], subvols[i]->fops->getxattr, loc, name, NULL); } return 0; fail: if (unwind) unwind (frame, -1, ENOMEM, NULL, NULL); else default_getxattr_failure_cbk (frame, ENOMEM); return 0; }