diff options
Diffstat (limited to 'libglusterfs')
38 files changed, 1165 insertions, 598 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index cd9d28e3cdc..385e8ef4600 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -12,7 +12,8 @@ libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \ -I$(CONTRIBDIR)/xxhash -libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) +libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) \ + $(URCU_LIBS) $(URCU_CDS_LIBS) libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \ -export-symbols $(top_srcdir)/libglusterfs/src/libglusterfs.sym diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c index 4cd22f4920c..9d377c3c2e1 100644 --- a/libglusterfs/src/client_t.c +++ b/libglusterfs/src/client_t.c @@ -314,8 +314,6 @@ client_destroy(client_t *client) clienttable = client->this->ctx->clienttable; - LOCK_DESTROY(&client->scratch_ctx.lock); - LOCK(&clienttable->lock); { clienttable->cliententries[client->tbl_index].client = NULL; @@ -333,6 +331,8 @@ client_destroy(client_t *client) if (client->subdir_inode) inode_unref(client->subdir_inode); + LOCK_DESTROY(&client->scratch_ctx.lock); + GF_FREE(client->auth.data); GF_FREE(client->auth.username); GF_FREE(client->auth.passwd); diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index 585952d7641..682cbf28055 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -37,6 +37,9 @@ #ifndef GF_LINUX_HOST_OS #include <sys/resource.h> #endif +#ifdef HAVE_SYNCFS_SYS +#include <sys/syscall.h> +#endif #include "glusterfs/compat-errno.h" #include "glusterfs/common-utils.h" @@ -50,6 +53,7 @@ #include "xxhash.h" #include <ifaddrs.h> #include "glusterfs/libglusterfs-messages.h" +#include "glusterfs/glusterfs-acl.h" #ifdef __FreeBSD__ #include <pthread_np.h> #undef BIT_SET @@ -75,6 +79,15 @@ char *vol_type_str[] = { typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size); typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size); +char *xattrs_to_heal[] = {"user.", + POSIX_ACL_ACCESS_XATTR, + POSIX_ACL_DEFAULT_XATTR, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + GF_SELINUX_XATTR_KEY, + GF_XATTR_MDATA_KEY, + NULL}; + void gf_xxh64_wrapper(const unsigned char *data, size_t const len, unsigned long long const seed, char *xxh64) @@ -425,7 +438,7 @@ gf_resolve_path_parent(const char *path) GF_VALIDATE_OR_GOTO(THIS->name, path, out); - if (strlen(path) <= 0) { + if (0 == strlen(path)) { gf_msg_callingfn(THIS->name, GF_LOG_DEBUG, 0, LG_MSG_INVALID_STRING, "invalid string for 'path'"); goto out; @@ -564,8 +577,14 @@ struct dnscache * gf_dnscache_init(time_t ttl) { struct dnscache *cache = GF_MALLOC(sizeof(*cache), gf_common_mt_dnscache); - if (cache) { - cache->cache_dict = NULL; + if (!cache) + return NULL; + + cache->cache_dict = dict_new(); + if (!cache->cache_dict) { + GF_FREE(cache); + cache = NULL; + } else { cache->ttl = ttl; } @@ -573,6 +592,20 @@ gf_dnscache_init(time_t ttl) } /** + * gf_dnscache_deinit -- cleanup resources used by struct dnscache + */ +void +gf_dnscache_deinit(struct dnscache *cache) +{ + if (!cache) { + gf_msg_plain(GF_LOG_WARNING, "dnscache is NULL"); + return; + } + dict_unref(cache->cache_dict); + GF_FREE(cache); +} + +/** * gf_dnscache_entry_init -- Initialize a dnscache entry * * @return: SUCCESS: Pointer to an allocated dnscache entry struct @@ -620,12 +653,6 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) if (!dnscache) goto out; - if (!dnscache->cache_dict) { - dnscache->cache_dict = dict_new(); - if (!dnscache->cache_dict) { - goto out; - } - } cache = dnscache->cache_dict; /* Quick cache lookup to see if we already hold it */ @@ -633,7 +660,7 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) if (entrydata) { dnsentry = (struct dnscache_entry *)entrydata->data; /* First check the TTL & timestamp */ - if (time(NULL) - dnsentry->timestamp > dnscache->ttl) { + if (gf_time() - dnsentry->timestamp > dnscache->ttl) { gf_dnscache_entry_deinit(dnsentry); entrydata->data = NULL; /* Mark this as 'null' so * dict_del () doesn't try free @@ -664,23 +691,16 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) from_cache = _gf_false; out: /* Insert into the cache */ - if (fqdn && !from_cache) { + if (fqdn && !from_cache && ip) { struct dnscache_entry *entry = gf_dnscache_entry_init(); - if (!entry) { - goto out; - } - entry->fqdn = fqdn; - if (!ip) { - gf_dnscache_entry_deinit(entry); - goto out; + if (entry) { + entry->fqdn = fqdn; + entry->ip = gf_strdup(ip); + entry->timestamp = gf_time(); + entrydata = bin_to_data(entry, sizeof(*entry)); + dict_set(cache, (char *)ip, entrydata); } - - entry->ip = gf_strdup(ip); - entry->timestamp = time(NULL); - - entrydata = bin_to_data(entry, sizeof(*entry)); - dict_set(cache, (char *)ip, entrydata); } return fqdn; } @@ -889,7 +909,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx) char msg[1024] = { 0, }; - char timestr[64] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; call_stack_t *stack = NULL; @@ -929,7 +949,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx) { /* Dump the timestamp of the crash too, so the previous logs can be related */ - gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT); gf_msg_plain_nomem(GF_LOG_ALERT, "time of crash: "); gf_msg_plain_nomem(GF_LOG_ALERT, timestr); } @@ -3113,7 +3133,7 @@ get_mem_size() memsize = page_size * num_pages; #endif -#if defined GF_DARWIN_HOST_OS +#if defined GF_DARWIN_HOST_OS || defined __FreeBSD__ size_t len = sizeof(memsize); int name[] = {CTL_HW, HW_PHYSMEM}; @@ -4127,6 +4147,14 @@ gf_skip_header_section(int fd, int header_len) gf_boolean_t gf_is_pid_running(int pid) { +#ifdef __FreeBSD__ + int ret = -1; + + ret = sys_kill(pid, 0); + if (ret < 0) { + return _gf_false; + } +#else char fname[32] = { 0, }; @@ -4140,6 +4168,7 @@ gf_is_pid_running(int pid) } sys_close(fd); +#endif return _gf_true; } @@ -4381,7 +4410,7 @@ gf_backtrace_end(char *buf, size_t frames) frames = min(frames, GF_BACKTRACE_LEN - pos - 1); - if (frames <= 0) + if (0 == frames) return; memset(buf + pos, ')', frames); @@ -4704,8 +4733,9 @@ recursive_rmdir(const char *delete_path) goto out; } - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - while (entry) { + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name); ret = sys_lstat(path, &st); if (ret == -1) { @@ -4731,8 +4761,6 @@ recursive_rmdir(const char *delete_path) gf_msg_debug(this->name, 0, "%s %s", ret ? "Failed to remove" : "Removed", entry->d_name); - - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); } ret = sys_closedir(dir); @@ -5412,3 +5440,26 @@ gf_nanosleep(uint64_t nsec) return ret; } + +int +gf_syncfs(int fd) +{ + int ret = 0; +#if defined(HAVE_SYNCFS) + /* Linux with glibc recent enough. */ + ret = syncfs(fd); +#elif defined(HAVE_SYNCFS_SYS) + /* Linux with no library function. */ + ret = syscall(SYS_syncfs, fd); +#else + /* Fallback to generic UNIX stuff. */ + sync(); +#endif + return ret; +} + +char ** +get_xattrs_to_heal() +{ + return xattrs_to_heal; +} diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c index 4a001c29209..3d890b04ec9 100644 --- a/libglusterfs/src/ctx.c +++ b/libglusterfs/src/ctx.c @@ -37,8 +37,12 @@ glusterfs_ctx_new() ctx->log.loglevel = DEFAULT_LOG_LEVEL; -#ifdef RUN_WITH_VALGRIND - ctx->cmd_args.valgrind = _gf_true; +#if defined(RUN_WITH_MEMCHECK) + ctx->cmd_args.vgtool = _gf_memcheck; +#elif defined(RUN_WITH_DRD) + ctx->cmd_args.vgtool = _gf_drd; +#else + ctx->cmd_args.vgtool = _gf_none; #endif /* lock is never destroyed! */ diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c index a1a6ad12527..1d9be9217a6 100644 --- a/libglusterfs/src/dict.c +++ b/libglusterfs/src/dict.c @@ -98,6 +98,7 @@ get_new_dict_full(int size_hint) } dict->free_pair.key = NULL; + dict->totkvlen = 0; LOCK_INIT(&dict->lock); return dict; @@ -410,6 +411,7 @@ dict_set_lk(dict_t *this, char *key, const int key_len, data_t *value, if (pair) { data_t *unref_data = pair->value; pair->value = data_ref(value); + this->totkvlen += (value->len - unref_data->len); data_unref(unref_data); if (key_free) GF_FREE(key); @@ -445,6 +447,7 @@ dict_set_lk(dict_t *this, char *key, const int key_len, data_t *value, } pair->key_hash = key_hash; pair->value = data_ref(value); + this->totkvlen += (keylen + 1 + value->len); /* If the divisor is 1, the modulo is always 0, * in such case avoid hash calculation. @@ -642,6 +645,7 @@ dict_deln(dict_t *this, char *key, const int keylen) else this->members[hashval] = pair->hash_next; + this->totkvlen -= pair->value->len; data_unref(pair->value); if (pair->prev) @@ -652,6 +656,7 @@ dict_deln(dict_t *this, char *key, const int keylen) if (pair->next) pair->next->prev = pair->prev; + this->totkvlen -= (strlen(pair->key) + 1); GF_FREE(pair->key); if (pair == &this->free_pair) { this->free_pair.key = NULL; @@ -701,6 +706,7 @@ dict_destroy(dict_t *this) prev = pair; } + this->totkvlen = 0; if (this->members != &this->members_internal) { mem_put(this->members); } @@ -1105,117 +1111,146 @@ data_to_int64(data_t *data) { VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); - return (int64_t)strtoull(data->data, NULL, 0); + char *endptr = NULL; + int64_t value = 0; + + errno = 0; + value = strtoll(data->data, &endptr, 0); + + if (endptr && *endptr != '\0') + /* Unrecognized characters at the end of string. */ + errno = EINVAL; + if (errno) { + gf_msg_callingfn("dict", GF_LOG_WARNING, errno, + LG_MSG_DATA_CONVERSION_ERROR, + "Error in data conversion: '%s' can't " + "be represented as int64_t", + data->data); + return -1; + } + return value; } +/* Like above but implies signed range check. */ + +#define DATA_TO_RANGED_SIGNED(endptr, value, data, type, min, max) \ + do { \ + errno = 0; \ + value = strtoll(data->data, &endptr, 0); \ + if (endptr && *endptr != '\0') \ + errno = EINVAL; \ + if (errno || value > max || value < min) { \ + gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \ + LG_MSG_DATA_CONVERSION_ERROR, \ + "Error in data conversion: '%s' can't " \ + "be represented as " #type, \ + data->data); \ + return -1; \ + } \ + return (type)value; \ + } while (0) + int32_t data_to_int32(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + char *endptr = NULL; + int64_t value = 0; - return strtoul(data->data, NULL, 0); + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + DATA_TO_RANGED_SIGNED(endptr, value, data, int32_t, INT_MIN, INT_MAX); } int16_t data_to_int16(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + char *endptr = NULL; + int64_t value = 0; - int16_t value = 0; - - errno = 0; - value = strtol(data->data, NULL, 0); - - if ((value > SHRT_MAX) || (value < SHRT_MIN)) { - errno = ERANGE; - gf_msg_callingfn("dict", GF_LOG_WARNING, errno, - LG_MSG_DATA_CONVERSION_ERROR, - "Error in data" - " conversion: detected overflow"); - return -1; - } - - return (int16_t)value; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + DATA_TO_RANGED_SIGNED(endptr, value, data, int16_t, SHRT_MIN, SHRT_MAX); } int8_t data_to_int8(data_t *data) { + char *endptr = NULL; + int64_t value = 0; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + DATA_TO_RANGED_SIGNED(endptr, value, data, int8_t, CHAR_MIN, CHAR_MAX); +} + +uint64_t +data_to_uint64(data_t *data) +{ + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); - int8_t value = 0; + char *endptr = NULL; + uint64_t value = 0; errno = 0; - value = strtol(data->data, NULL, 0); + value = strtoull(data->data, &endptr, 0); - if ((value > SCHAR_MAX) || (value < SCHAR_MIN)) { - errno = ERANGE; + if (endptr && *endptr != '\0') + errno = EINVAL; + if (errno) { gf_msg_callingfn("dict", GF_LOG_WARNING, errno, LG_MSG_DATA_CONVERSION_ERROR, - "Error in data" - " conversion: detected overflow"); + "Error in data conversion: '%s' can't " + "be represented as uint64_t", + data->data); return -1; } - - return (int8_t)value; + return value; } -uint64_t -data_to_uint64(data_t *data) -{ - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); +/* Like above but implies unsigned range check. */ - return strtoll(data->data, NULL, 0); -} +#define DATA_TO_RANGED_UNSIGNED(endptr, value, data, type, max) \ + do { \ + errno = 0; \ + value = strtoull(data->data, &endptr, 0); \ + if (endptr && *endptr != '\0') \ + errno = EINVAL; \ + if (errno || value > max) { \ + gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \ + LG_MSG_DATA_CONVERSION_ERROR, \ + "Error in data conversion: '%s' can't " \ + "be represented as " #type, \ + data->data); \ + return -1; \ + } \ + return (type)value; \ + } while (0) uint32_t data_to_uint32(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + char *endptr = NULL; + uint64_t value = 0; - return strtol(data->data, NULL, 0); + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint32_t, UINT_MAX); } uint16_t data_to_uint16(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); - - uint16_t value = 0; - - errno = 0; - value = strtol(data->data, NULL, 0); + char *endptr = NULL; + uint64_t value = 0; - if ((USHRT_MAX - value) < 0) { - errno = ERANGE; - gf_msg_callingfn("dict", GF_LOG_WARNING, errno, - LG_MSG_DATA_CONVERSION_ERROR, - "Error in data conversion: " - "overflow detected"); - return -1; - } - - return (uint16_t)value; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint16_t, USHRT_MAX); } uint8_t data_to_uint8(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); - - errno = 0; - uint32_t value = strtol(data->data, NULL, 0); - - if ((UCHAR_MAX - (uint8_t)value) < 0) { - errno = ERANGE; - gf_msg_callingfn("dict", GF_LOG_WARNING, errno, - LG_MSG_DATA_CONVERSION_ERROR, - "data " - "conversion overflow detected"); - return -1; - } + char *endptr = NULL; + uint64_t value = 0; - return (uint8_t)value; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint8_t, UCHAR_MAX); } char * @@ -1460,32 +1495,13 @@ fail: * -val error, val = errno */ -int -dict_get_with_ref(dict_t *this, char *key, data_t **data) -{ - if (!this || !key || !data) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "dict OR key (%s) is NULL", key); - return -EINVAL; - } - - return dict_get_with_refn(this, key, strlen(key), data); -} - -int +static int dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data) { data_pair_t *pair = NULL; int ret = -ENOENT; uint32_t hash; - if (!this || !key || !data) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "dict OR key (%s) is NULL", key); - ret = -EINVAL; - goto err; - } - hash = (uint32_t)XXH64(key, keylen, 0); LOCK(&this->lock); @@ -1498,10 +1514,22 @@ dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data) } } UNLOCK(&this->lock); -err: + return ret; } +int +dict_get_with_ref(dict_t *this, char *key, data_t **data) +{ + if (!this || !key || !data) { + gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, + "dict OR key (%s) is NULL", key); + return -EINVAL; + } + + return dict_get_with_refn(this, key, strlen(key), data); +} + static int data_to_ptr_common(data_t *data, void **val) { @@ -1675,7 +1703,7 @@ dict_get_int8(dict_t *this, char *key, int8_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1721,7 +1749,7 @@ dict_get_int16(dict_t *this, char *key, int16_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1793,7 +1821,7 @@ dict_get_int32(dict_t *this, char *key, int32_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1858,7 +1886,7 @@ dict_get_int64(dict_t *this, char *key, int64_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1903,7 +1931,7 @@ dict_get_uint16(dict_t *this, char *key, uint16_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1948,7 +1976,7 @@ dict_get_uint32(dict_t *this, char *key, uint32_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1993,7 +2021,7 @@ dict_get_uint64(dict_t *this, char *key, uint64_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -2146,7 +2174,7 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op) strcpy(pair->key, key); pair->key_hash = hash; pair->value = data_ref(data); - + this->totkvlen += (strlen(key) + 1 + data->len); hashval = hash % this->hash_size; pair->hash_next = this->members[hashval]; this->members[hashval] = pair; @@ -2216,7 +2244,7 @@ dict_get_double(dict_t *this, char *key, double *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -2299,7 +2327,7 @@ dict_get_ptr(dict_t *this, char *key, void **ptr) data_t *data = NULL; int ret = 0; - if (!this || !key || !ptr) { + if (!ptr) { ret = -EINVAL; goto err; } @@ -2329,7 +2357,7 @@ dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len) data_t *data = NULL; int ret = 0; - if (!this || !key || !ptr) { + if (!ptr) { ret = -EINVAL; goto err; } @@ -2387,7 +2415,7 @@ dict_get_str(dict_t *this, char *key, char **str) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !str) { + if (!str) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2561,7 +2589,7 @@ dict_get_bin(dict_t *this, char *key, void **bin) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !bin) { + if (!bin) { goto err; } @@ -2664,7 +2692,7 @@ dict_get_gfuuid(dict_t *this, char *key, uuid_t *gfid) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !gfid) { + if (!gfid) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2697,7 +2725,7 @@ dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !mdata) { + if (!mdata) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2735,7 +2763,7 @@ dict_get_iatt(dict_t *this, char *key, struct iatt *iatt) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !iatt) { + if (!iatt) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2872,8 +2900,7 @@ dict_serialized_length_lk(dict_t *this) { int ret = -EINVAL; int count = this->count; - int len = DICT_HDR_LEN; - data_pair_t *pair = this->members_list; + const int keyhdrlen = DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN; if (count < 0) { gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_COUNT_LESS_THAN_ZERO, @@ -2881,41 +2908,7 @@ dict_serialized_length_lk(dict_t *this) goto out; } - while (count) { - if (!pair) { - gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_PAIRS_LESS_THAN_COUNT, - NULL); - goto out; - } - - len += DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN; - - if (!pair->key) { - gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_NULL_PTR, NULL); - goto out; - } - - len += strlen(pair->key) + 1 /* for '\0' */; - - if (!pair->value) { - gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_NULL_PTR, NULL); - goto out; - } - - if (pair->value->len < 0) { - gf_smsg("dict", GF_LOG_ERROR, EINVAL, - LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, "len=%d", - pair->value->len, NULL); - goto out; - } - - len += pair->value->len; - - pair = pair->next; - count--; - } - - ret = len; + ret = DICT_HDR_LEN + this->totkvlen + (count * keyhdrlen); out: return ret; } diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c index 235128b6044..402c253ca25 100644 --- a/libglusterfs/src/event.c +++ b/libglusterfs/src/event.c @@ -17,6 +17,7 @@ #include <string.h> #include "glusterfs/gf-event.h" +#include "glusterfs/timespec.h" #include "glusterfs/common-utils.h" #include "glusterfs/libglusterfs-messages.h" #include "glusterfs/syscall.h" @@ -266,7 +267,7 @@ gf_event_dispatch_destroy(struct event_pool *event_pool) if (sys_write(fd[1], "dummy", 6) == -1) { break; } - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; ret = pthread_cond_timedwait(&event_pool->cond, &event_pool->mutex, &sleep_till); diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c index 6d1e3836477..33157549897 100644 --- a/libglusterfs/src/events.c +++ b/libglusterfs/src/events.c @@ -40,6 +40,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) char *host = NULL; struct addrinfo hints; struct addrinfo *result = NULL; + struct addrinfo *iter_result_ptr = NULL; xlator_t *this = THIS; char *volfile_server_transport = NULL; @@ -51,13 +52,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) goto out; } - /* Initialize UDP socket */ - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - ret = EVENT_ERROR_SOCKET; - goto out; - } - if (ctx) { volfile_server_transport = ctx->cmd_args.volfile_server_transport; } @@ -66,7 +60,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) } /* host = NULL returns localhost */ - host = NULL; if (ctx && ctx->cmd_args.volfile_server && (strcmp(volfile_server_transport, "unix"))) { /* If it is client code then volfile_server is set @@ -84,6 +77,24 @@ _gf_event(eventtypes_t event, const char *fmt, ...) goto out; } + // iterate over the result and break when socket creation is success. + for (iter_result_ptr = result; iter_result_ptr != NULL; + iter_result_ptr = iter_result_ptr->ai_next) { + sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype, + iter_result_ptr->ai_protocol); + if (sock != -1) { + break; + } + } + /* + * If none of the addrinfo structures lead to a successful socket + * creation, socket creation has failed. + */ + if (sock < 0) { + ret = EVENT_ERROR_SOCKET; + goto out; + } + va_start(arguments, fmt); ret = gf_vasprintf(&msg, fmt, arguments); va_end(arguments); @@ -93,7 +104,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) goto out; } - ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg); + ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)gf_time(), event, msg); GF_FREE(msg); if (ret <= 0) { ret = EVENT_ERROR_MSG_FORMAT; diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c index e0767a7e61f..62606e91164 100644 --- a/libglusterfs/src/fd.c +++ b/libglusterfs/src/fd.c @@ -502,6 +502,32 @@ out: } void +fd_close(fd_t *fd) +{ + xlator_t *xl, *old_THIS; + + old_THIS = THIS; + + for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) { + if (!xl->call_cleanup) { + THIS = xl; + + if (IA_ISDIR(fd->inode->ia_type)) { + if (xl->cbks->fdclosedir != NULL) { + xl->cbks->fdclosedir(xl, fd); + } + } else { + if (xl->cbks->fdclose != NULL) { + xl->cbks->fdclose(xl, fd); + } + } + } + } + + THIS = old_THIS; +} + +void fd_unref(fd_t *fd) { int32_t refcount = 0; diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c index 9c8c74beb54..a809efc97ef 100644 --- a/libglusterfs/src/gf-dirent.c +++ b/libglusterfs/src/gf-dirent.c @@ -276,7 +276,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol) gf_uuid_copy(loc.pargfid, parent->gfid); loc.name = entry->d_name; loc.parent = inode_ref(parent); - ret = inode_path(loc.inode, entry->d_name, &path); + ret = inode_path(loc.parent, entry->d_name, &path); loc.path = path; if (ret < 0) goto out; diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c index 40fcffbb35e..64a93802f76 100644 --- a/libglusterfs/src/gidcache.c +++ b/libglusterfs/src/gidcache.c @@ -10,6 +10,7 @@ #include "glusterfs/gidcache.h" #include "glusterfs/mem-pool.h" +#include "glusterfs/common-utils.h" /* * We treat this as a very simple set-associative LRU cache, with entries aged @@ -64,7 +65,7 @@ gid_cache_lookup(gid_cache_t *cache, uint64_t id, uint64_t uid, uint64_t gid) time_t now; const gid_list_t *agl; - now = time(NULL); + now = gf_time(); LOCK(&cache->gc_lock); bucket = id % cache->gc_nbuckets; agl = BUCKET_START(cache->gc_cache, bucket); @@ -132,7 +133,7 @@ gid_cache_add(gid_cache_t *cache, gid_list_t *gl) if (!cache->gc_max_age) return 0; - now = time(NULL); + now = gf_time(); LOCK(&cache->gc_lock); /* diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h index 13e413c3632..f297fdab5c9 100644 --- a/libglusterfs/src/glusterfs/common-utils.h +++ b/libglusterfs/src/glusterfs/common-utils.h @@ -18,6 +18,7 @@ #include <string.h> #include <assert.h> #include <pthread.h> +#include <unistd.h> #include <openssl/md5.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -26,6 +27,11 @@ #include <fnmatch.h> #include <uuid/uuid.h> +/* FreeBSD, etc. */ +#ifndef __BITS_PER_LONG +#define __BITS_PER_LONG (CHAR_BIT * (sizeof(long))) +#endif + #ifndef ffsll #define ffsll(x) __builtin_ffsll(x) #endif @@ -148,6 +154,9 @@ trap(void); #define GF_THREAD_NAME_LIMIT 16 #define GF_THREAD_NAME_PREFIX "glfs_" +/* Advisory buffer size for formatted timestamps (see gf_time_fmt) */ +#define GF_TIMESTR_SIZE 256 + /* * we could have initialized these as +ve values and treated * them as negative while comparing etc.. (which would have @@ -180,6 +189,12 @@ enum _gf_xlator_ipc_targets { typedef enum _gf_special_pid gf_special_pid_t; typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t; +/* Array to hold custom xattr keys */ +extern char *xattrs_to_heal[]; + +char ** +get_xattrs_to_heal(); + /* The DHT file rename operation is not a straightforward rename. * It involves creating linkto and linkfiles, and can unlink or rename the * source file depending on the hashed and cached subvols for the source @@ -240,6 +255,8 @@ list_node_del(struct list_node *node); struct dnscache * gf_dnscache_init(time_t ttl); +void +gf_dnscache_deinit(struct dnscache *cache); struct dnscache_entry * gf_dnscache_entry_init(void); void @@ -438,6 +455,15 @@ BIT_VALUE(unsigned char *array, unsigned int index) } while (0) #endif +/* Compile-time assert, borrowed from Linux kernel. */ +#ifdef HAVE_STATIC_ASSERT +#define GF_STATIC_ASSERT(expr, ...) \ + __gf_static_assert(expr, ##__VA_ARGS__, #expr) +#define __gf_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#else +#define GF_STATIC_ASSERT(expr, ...) +#endif + #define GF_ABORT(msg...) \ do { \ gf_msg_callingfn("", GF_LOG_CRITICAL, 0, LG_MSG_ASSERTION_FAILED, \ @@ -468,18 +494,15 @@ union gf_sock_union { #define IOV_MIN(n) min(IOV_MAX, n) -#define GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scr) \ - do { \ - entry = NULL; \ - if (dir) { \ - entry = sys_readdir(dir, scr); \ - while (entry && (!strcmp(entry->d_name, ".") || \ - !fnmatch("*.tmp", entry->d_name, 0) || \ - !strcmp(entry->d_name, ".."))) { \ - entry = sys_readdir(dir, scr); \ - } \ - } \ - } while (0) +static inline gf_boolean_t +gf_irrelevant_entry(struct dirent *entry) +{ + GF_ASSERT(entry); + + return (!strcmp(entry->d_name, ".") || + !fnmatch("*.tmp", entry->d_name, 0) || + !strcmp(entry->d_name, "..")); +} static inline void iov_free(struct iovec *vector, int count) @@ -790,7 +813,7 @@ typedef enum { } gf_timefmts; static inline char * -gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) +gf_time_fmt_tv(char *dst, size_t sz_dst, struct timeval *tv, unsigned int fmt) { extern void _gf_timestuff(const char ***, const char ***); static gf_timefmts timefmt_last = (gf_timefmts)-1; @@ -798,6 +821,8 @@ gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) static const char **zeros; struct tm tm, *res; int localtime = 0; + int len = 0; + int pos = 0; if (timefmt_last == ((gf_timefmts)-1)) { _gf_timestuff(&fmts, &zeros); @@ -807,15 +832,35 @@ gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) fmt = gf_timefmt_default; } localtime = gf_log_get_localtime(); - res = localtime ? localtime_r(&utime, &tm) : gmtime_r(&utime, &tm); - if (utime && (res != NULL)) { - strftime(dst, sz_dst, fmts[fmt], &tm); + res = localtime ? localtime_r(&tv->tv_sec, &tm) + : gmtime_r(&tv->tv_sec, &tm); + if (tv->tv_sec && (res != NULL)) { + len = strftime(dst, sz_dst, fmts[fmt], &tm); + if (len == 0) + return dst; + pos += len; + if (tv->tv_usec >= 0) { + len = snprintf(dst + pos, sz_dst - pos, ".%" GF_PRI_SUSECONDS, + tv->tv_usec); + if (len >= sz_dst - pos) + return dst; + pos += len; + } + strftime(dst + pos, sz_dst - pos, " %z", &tm); } else { strncpy(dst, "N/A", sz_dst); } return dst; } +static inline char * +gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) +{ + struct timeval tv = {utime, -1}; + + return gf_time_fmt_tv(dst, sz_dst, &tv, fmt); +} + /* This function helps us use gfid (unique identity) to generate inode's unique * number in glusterfs. */ @@ -1165,6 +1210,47 @@ int gf_d_type_from_ia_type(ia_type_t type); int +gf_syncfs(int fd); + +int gf_nanosleep(uint64_t nsec); +static inline time_t +gf_time(void) +{ + return time(NULL); +} + +/* Return delta value in microseconds. */ + +static inline double +gf_tvdiff(struct timeval *start, struct timeval *end) +{ + struct timeval t; + + if (start->tv_usec > end->tv_usec) + t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000; + else + t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec; + + return (double)(t.tv_sec - start->tv_sec) * 1e6 + + (double)(t.tv_usec - start->tv_usec); +} + +/* Return delta value in nanoseconds. */ + +static inline double +gf_tsdiff(struct timespec *start, struct timespec *end) +{ + struct timespec t; + + if (start->tv_nsec > end->tv_nsec) + t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000; + else + t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec; + + return (double)(t.tv_sec - start->tv_sec) * 1e9 + + (double)(t.tv_nsec - start->tv_nsec); +} + #endif /* _COMMON_UTILS_H */ diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h index ce1c2100276..d0467c6dfb6 100644 --- a/libglusterfs/src/glusterfs/dict.h +++ b/libglusterfs/src/glusterfs/dict.h @@ -25,9 +25,6 @@ typedef struct _data_pair data_pair_t; #define dict_add_sizen(this, key, value) dict_addn(this, key, SLEN(key), value) -#define dict_get_with_ref_sizen(this, key, value) \ - dict_get_with_refn(this, key, SLEN(key), value) - #define dict_get_sizen(this, key) dict_getn(this, key, SLEN(key)) #define dict_del_sizen(this, key) dict_deln(this, key, SLEN(key)) @@ -98,7 +95,7 @@ struct _data { char *data; gf_atomic_t refcount; gf_dict_data_type_t data_type; - int32_t len; + uint32_t len; gf_boolean_t is_static; }; @@ -122,6 +119,8 @@ struct _dict { gf_lock_t lock; data_pair_t *members_internal; data_pair_t free_pair; + /* Variable to store total keylen + value->len */ + uint32_t totkvlen; }; typedef gf_boolean_t (*dict_match_t)(dict_t *d, char *k, data_t *v, void *data); @@ -136,6 +135,7 @@ int32_t dict_set(dict_t *this, char *key, data_t *value); int32_t dict_setn(dict_t *this, char *key, const int keylen, data_t *value); + /* function to set a new key/value pair (without checking for duplicate) */ int32_t dict_add(dict_t *this, char *key, data_t *value); @@ -143,8 +143,6 @@ int32_t dict_addn(dict_t *this, char *key, const int keylen, data_t *value); int dict_get_with_ref(dict_t *this, char *key, data_t **data); -int -dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data); data_t * dict_get(dict_t *this, char *key); data_t * diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h index 28906d34e4d..3ffaaa60504 100644 --- a/libglusterfs/src/glusterfs/fd.h +++ b/libglusterfs/src/glusterfs/fd.h @@ -106,6 +106,9 @@ fd_ref(fd_t *fd); void fd_unref(fd_t *fd); +void +fd_close(fd_t *fd); + fd_t * fd_create(struct _inode *inode, pid_t pid); diff --git a/libglusterfs/src/glusterfs/gf-event.h b/libglusterfs/src/glusterfs/gf-event.h index c0f05e7c83b..40f8fbdf10a 100644 --- a/libglusterfs/src/glusterfs/gf-event.h +++ b/libglusterfs/src/glusterfs/gf-event.h @@ -12,6 +12,7 @@ #define _GF_EVENT_H_ #include <pthread.h> +#include "common-utils.h" #include "list.h" struct event_pool; @@ -31,6 +32,9 @@ typedef void (*event_handler_t)(int fd, int idx, int gen, void *data, #define EVENT_EPOLL_SLOTS 1024 #define EVENT_MAX_THREADS 1024 +/* See rpcsvc.h to check why. */ +GF_STATIC_ASSERT(EVENT_MAX_THREADS % __BITS_PER_LONG == 0); + struct event_pool { struct event_ops *ops; diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h index e50ee3046af..b22eaae6c2f 100644 --- a/libglusterfs/src/glusterfs/globals.h +++ b/libglusterfs/src/glusterfs/globals.h @@ -45,7 +45,7 @@ 1 /* MIN is the fresh start op-version, mostly \ should not change */ #define GD_OP_VERSION_MAX \ - GD_OP_VERSION_8_0 /* MAX VERSION is the maximum \ + GD_OP_VERSION_9_0 /* MAX VERSION is the maximum \ count in VME table, should \ keep changing with \ introduction of newer \ @@ -122,6 +122,8 @@ #define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */ +#define GD_OP_VERSION_9_0 90000 /* Op-version for GlusterFS 9.0 */ + #define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0 #include "glusterfs/xlator.h" diff --git a/libglusterfs/src/glusterfs/glusterfs-acl.h b/libglusterfs/src/glusterfs/glusterfs-acl.h index cae55e8062f..987bf5fab0b 100644 --- a/libglusterfs/src/glusterfs/glusterfs-acl.h +++ b/libglusterfs/src/glusterfs/glusterfs-acl.h @@ -143,7 +143,7 @@ gf_posix_acl_get_key(const acl_type_t type) return acl_key; } -static inline const acl_type_t +static inline acl_type_t gf_posix_acl_get_type(const char *key) { acl_type_t type = 0; diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 6d6ac36cfd5..e6425618b7f 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -43,6 +43,9 @@ #define GF_YES 1 #define GF_NO 0 +#define IS_ERROR(ret) ((ret) < 0) +#define IS_SUCCESS(ret) ((ret) >= 0) + #ifndef O_LARGEFILE /* savannah bug #20053, patch for compiling on darwin */ #define O_LARGEFILE 0100000 /* from bits/fcntl.h */ @@ -422,7 +425,7 @@ static const char *const FOP_PRI_STRINGS[] = {"HIGH", "NORMAL", "LOW", "LEAST"}; static inline const char * fop_pri_to_string(gf_fop_pri_t pri) { - if (pri < 0) + if (IS_ERROR(pri)) return "UNSPEC"; if (pri >= GF_FOP_PRI_MAX) @@ -463,6 +466,8 @@ typedef struct _server_cmdline server_cmdline_t; #define GF_OPTION_DISABLE _gf_false #define GF_OPTION_DEFERRED 2 +typedef enum { _gf_none, _gf_memcheck, _gf_drd } gf_valgrind_tool; + struct _cmd_args { /* basic options */ char *volfile_server; @@ -555,7 +560,8 @@ struct _cmd_args { /* Run this process with valgrind? Might want to prevent calling * functions that prevent valgrind from working correctly, like * dlclose(). */ - int valgrind; + gf_valgrind_tool vgtool; + int localtime_logging; /* For the subdir mount */ @@ -727,6 +733,13 @@ struct _glusterfs_ctx { } stats; struct list_head volfile_list; + /* Add members to manage janitor threads for cleanup fd */ + struct list_head janitor_fds; + pthread_cond_t fd_cond; + pthread_mutex_t fd_lock; + pthread_t janitor; + /* The variable is use to save total posix xlator count */ + uint32_t pxl_count; char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ }; diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h index ed47b1f0cbc..4d601bbcbd6 100644 --- a/libglusterfs/src/glusterfs/latency.h +++ b/libglusterfs/src/glusterfs/latency.h @@ -11,13 +11,23 @@ #ifndef __LATENCY_H__ #define __LATENCY_H__ -#include "glusterfs/glusterfs.h" +#include <inttypes.h> +#include <time.h> -typedef struct fop_latency { - double min; /* min time for the call (microseconds) */ - double max; /* max time for the call (microseconds) */ - double total; /* total time (microseconds) */ +typedef struct _gf_latency { + uint64_t min; /* min time for the call (nanoseconds) */ + uint64_t max; /* max time for the call (nanoseconds) */ + uint64_t total; /* total time (nanoseconds) */ uint64_t count; -} fop_latency_t; +} gf_latency_t; +gf_latency_t * +gf_latency_new(size_t n); + +void +gf_latency_reset(gf_latency_t *lat); + +void +gf_latency_update(gf_latency_t *lat, struct timespec *begin, + struct timespec *end); #endif /* __LATENCY_H__ */ diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h index 90fb8820c74..e5b3276d047 100644 --- a/libglusterfs/src/glusterfs/mem-pool.h +++ b/libglusterfs/src/glusterfs/mem-pool.h @@ -202,6 +202,24 @@ out: return dup_mem; } +#ifdef GF_DISABLE_MEMPOOL + +/* No-op memory pool enough to fit current API without massive redesign. */ + +struct mem_pool { + unsigned long sizeof_type; +}; + +#define mem_pools_init() \ + do { \ + } while (0) +#define mem_pools_fini() \ + do { \ + } while (0) +#define mem_pool_thread_destructor(pool_list) (void)pool_list + +#else /* !GF_DISABLE_MEMPOOL */ + /* kind of 'header' for the actual mem_pool_shared structure, this might make * it possible to dump some more details in a statedump */ struct mem_pool { @@ -209,10 +227,11 @@ struct mem_pool { unsigned long sizeof_type; unsigned long count; /* requested pool size (unused) */ char *name; - gf_atomic_t active; /* current allocations */ + char *xl_name; + gf_atomic_t active; /* current allocations */ #ifdef DEBUG - gf_atomic_t hit; /* number of allocations served from pt_pool */ - gf_atomic_t miss; /* number of std allocs due to miss */ + gf_atomic_t hit; /* number of allocations served from pt_pool */ + gf_atomic_t miss; /* number of std allocs due to miss */ #endif struct list_head owner; /* glusterfs_ctx_t->mempool_list */ glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */ @@ -286,6 +305,10 @@ void mem_pools_init(void); /* start the pool_sweeper thread */ void mem_pools_fini(void); /* cleanup memory pools */ +void +mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); + +#endif /* GF_DISABLE_MEMPOOL */ struct mem_pool * mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, @@ -308,9 +331,6 @@ void mem_pool_destroy(struct mem_pool *pool); void -mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); - -void gf_mem_acct_enable_set(void *ctx); #endif /* _MEM_POOL_H */ diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h index 36cf7820ad5..d45d5b68c91 100644 --- a/libglusterfs/src/glusterfs/mem-types.h +++ b/libglusterfs/src/glusterfs/mem-types.h @@ -133,6 +133,7 @@ enum gf_common_mem_types_ { gf_common_volfile_t, gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */ gf_common_mt_server_cmdline_t, /* used only in one location */ + gf_common_mt_latency_t, gf_common_mt_end }; #endif diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h index 17585508a22..536a330d38b 100644 --- a/libglusterfs/src/glusterfs/stack.h +++ b/libglusterfs/src/glusterfs/stack.h @@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame, xlator_t *this, int32_t op_ret, int32_t op_errno, ...); +void +gf_frame_latency_update(call_frame_t *frame); + struct call_pool { union { struct list_head all_frames; @@ -149,8 +152,6 @@ struct _call_stack { } while (0); struct xlator_fops; -void -gf_update_latency(call_frame_t *frame); static inline void FRAME_DESTROY(call_frame_t *frame) @@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame) void *local = NULL; if (frame->root->ctx->measure_latency) - gf_update_latency(frame); + gf_frame_latency_update(frame); list_del_init(&frame->frames); if (frame->local) { @@ -429,6 +430,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) if (ngrps <= SMALL_GROUP_COUNT) { stack->groups = stack->groups_small; } else { + GF_FREE(stack->groups_large); stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t), gf_common_mt_groups_t); if (!stack->groups_large) @@ -442,6 +444,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) } static inline int +call_stack_groups_capacity(call_stack_t *stack) +{ + return max(stack->ngrps, SMALL_GROUP_COUNT); +} + +static inline int call_frames_count(call_stack_t *call_stack) { call_frame_t *pos; diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h index 89d04f94587..ce082706bdf 100644 --- a/libglusterfs/src/glusterfs/statedump.h +++ b/libglusterfs/src/glusterfs/statedump.h @@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd); void gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd); +void +gf_latency_statedump_and_reset(char *key, gf_latency_t *lat); #endif /* STATEDUMP_H */ diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h index c8be544e164..a1f70c7b840 100644 --- a/libglusterfs/src/glusterfs/store.h +++ b/libglusterfs/src/glusterfs/store.h @@ -95,7 +95,7 @@ int32_t gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value); int32_t -gf_store_iter_destroy(gf_store_iter_t *iter); +gf_store_iter_destroy(gf_store_iter_t **iter); char * gf_store_strerror(gf_store_op_errno_t op_errno); diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h index bfdec491ba8..4e9241a32fc 100644 --- a/libglusterfs/src/glusterfs/syncop.h +++ b/libglusterfs/src/glusterfs/syncop.h @@ -16,6 +16,8 @@ #include <ucontext.h> #include "glusterfs/dict.h" // for dict_t #include "glusterfs/stack.h" // for call_frame_t, STACK_DESTROY, STACK_... +#include "glusterfs/timer.h" + #define SYNCENV_PROC_MAX 16 #define SYNCENV_PROC_MIN 2 #define SYNCPROC_IDLE_TIME 600 @@ -29,9 +31,15 @@ #define SYNCOPCTX_PID 0x00000008 #define SYNCOPCTX_LKOWNER 0x00000010 +#ifdef HAVE_TSAN_API +/* Currently hardcoded within thread context maintained by the sanitizer. */ +#define TSAN_THREAD_NAMELEN 64 +#endif + struct synctask; struct syncproc; struct syncenv; +struct synccond; typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque); @@ -55,9 +63,12 @@ struct synctask { call_frame_t *opframe; synctask_cbk_t synccbk; synctask_fn_t syncfn; - synctask_state_t state; + struct timespec *delta; + gf_timer_t *timer; + struct synccond *synccond; void *opaque; void *stack; + synctask_state_t state; int woken; int slept; int ret; @@ -65,6 +76,13 @@ struct synctask { uid_t uid; gid_t gid; +#ifdef HAVE_TSAN_API + struct { + void *fiber; + char name[TSAN_THREAD_NAMELEN]; + } tsan; +#endif + ucontext_t ctx; struct syncproc *proc; @@ -77,6 +95,14 @@ struct synctask { struct syncproc { pthread_t processor; + +#ifdef HAVE_TSAN_API + struct { + void *fiber; + char name[TSAN_THREAD_NAMELEN]; + } tsan; +#endif + ucontext_t sched; struct syncenv *env; struct synctask *current; @@ -85,19 +111,21 @@ struct syncproc { /* hosts the scheduler thread and framework for executing synctasks */ struct syncenv { struct syncproc proc[SYNCENV_PROC_MAX]; - int procs; + + pthread_mutex_t mutex; + pthread_cond_t cond; struct list_head runq; - int runcount; struct list_head waitq; - int waitcount; + + int procs; + int procs_idle; + + int runcount; int procmin; int procmax; - pthread_mutex_t mutex; - pthread_cond_t cond; - size_t stacksize; int destroy; /* FLAG to mark syncenv is in destroy mode @@ -123,6 +151,13 @@ struct synclock { }; typedef struct synclock synclock_t; +struct synccond { + pthread_mutex_t pmutex; + pthread_cond_t pcond; + struct list_head waitq; +}; +typedef struct synccond synccond_t; + struct syncbarrier { gf_boolean_t initialized; /*Set on successful initialization*/ pthread_mutex_t guard; /* guard the remaining members, pair @cond */ @@ -219,7 +254,7 @@ struct syncopctx { #define __yield(args) \ do { \ if (args->task) { \ - synctask_yield(args->task); \ + synctask_yield(args->task, NULL); \ } else { \ pthread_mutex_lock(&args->mutex); \ { \ @@ -310,7 +345,9 @@ synctask_join(struct synctask *task); void synctask_wake(struct synctask *task); void -synctask_yield(struct synctask *task); +synctask_yield(struct synctask *task, struct timespec *delta); +void +synctask_sleep(int32_t secs); void synctask_waitfor(struct synctask *task, int count); @@ -408,6 +445,24 @@ synclock_trylock(synclock_t *lock); int synclock_unlock(synclock_t *lock); +int32_t +synccond_init(synccond_t *cond); + +void +synccond_destroy(synccond_t *cond); + +int +synccond_wait(synccond_t *cond, synclock_t *lock); + +int +synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta); + +void +synccond_signal(synccond_t *cond); + +void +synccond_broadcast(synccond_t *cond); + int syncbarrier_init(syncbarrier_t *barrier); int diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h index 91e921aea50..b6d3ab4f2ad 100644 --- a/libglusterfs/src/glusterfs/syscall.h +++ b/libglusterfs/src/glusterfs/syscall.h @@ -266,4 +266,13 @@ ssize_t sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, size_t len, unsigned int flags); +int +sys_kill(pid_t pid, int sig); + +#ifdef __FreeBSD__ +int +sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp, + const void *newp, size_t newlen); +#endif + #endif /* __SYSCALL_H__ */ diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index c94bd75cf1c..4fd3abdaeff 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -702,6 +702,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode); typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd); +typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd); + struct xlator_cbks { cbk_forget_t forget; cbk_release_t release; @@ -712,6 +714,8 @@ struct xlator_cbks { cbk_ictxmerge_t ictxmerge; cbk_inodectx_size_t ictxsize; cbk_fdctx_size_t fdctxsize; + cbk_fdclose_t fdclose; + cbk_fdclose_t fdclosedir; }; typedef int32_t (*dumpop_priv_t)(xlator_t *this); @@ -801,7 +805,7 @@ struct _xlator { struct { /* for latency measurement */ - fop_latency_t latencies[GF_FOP_MAXVALUE]; + gf_latency_t latencies[GF_FOP_MAXVALUE]; /* for latency measurement */ fop_metrics_t metrics[GF_FOP_MAXVALUE]; diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index 64e77e095a2..13f298eb3bd 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -41,7 +41,7 @@ _gf_dump_details (int argc, char **argv) { extern FILE *gf_log_logfile; int i = 0; - char timestr[64]; + char timestr[GF_TIMESTR_SIZE]; time_t utime = 0; pid_t mypid = 0; struct utsname uname_buf = {{0, }, }; @@ -482,7 +482,7 @@ fill_uuid(char *uuid, int size, struct timeval tv) char hostname[50] = { 0, }; - char now_str[64]; + char now_str[GF_TIMESTR_SIZE]; if (gethostname(hostname, sizeof(hostname) - 1) != 0) { gf_msg("graph", GF_LOG_ERROR, errno, LG_MSG_GETHOSTNAME_FAILED, @@ -490,9 +490,8 @@ fill_uuid(char *uuid, int size, struct timeval tv) hostname[sizeof(hostname) - 1] = '\0'; } - gf_time_fmt(now_str, sizeof now_str, tv.tv_sec, gf_timefmt_dirent); - snprintf(uuid, size, "%s-%d-%s:%" GF_PRI_SUSECONDS, hostname, getpid(), - now_str, tv.tv_usec); + gf_time_fmt_tv(now_str, sizeof now_str, &tv, gf_timefmt_dirent); + snprintf(uuid, size, "%s-%d-%s", hostname, getpid(), now_str); return; } diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c index 15b397c3799..ce4b0e8255d 100644 --- a/libglusterfs/src/latency.c +++ b/libglusterfs/src/latency.c @@ -16,35 +16,32 @@ #include "glusterfs/glusterfs.h" #include "glusterfs/statedump.h" -void -gf_update_latency(call_frame_t *frame) +gf_latency_t * +gf_latency_new(size_t n) { - double elapsed; - struct timespec *begin, *end; - - fop_latency_t *lat; - - begin = &frame->begin; - end = &frame->end; + int i = 0; + gf_latency_t *lat = NULL; - if (!(begin->tv_sec && end->tv_sec)) - goto out; + lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t); + if (!lat) + return NULL; - elapsed = (end->tv_sec - begin->tv_sec) * 1e9 + - (end->tv_nsec - begin->tv_nsec); + for (i = 0; i < n; i++) { + gf_latency_reset(lat + i); + } + return lat; +} - if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) { - gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d", - frame->op); +void +gf_latency_update(gf_latency_t *lat, struct timespec *begin, + struct timespec *end) +{ + if (!(begin->tv_sec && end->tv_sec)) { + /*Measure latency might have been enabled/disabled during the op*/ return; } - /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros - set it right anyways for those frames */ - if (!frame->op) - frame->op = frame->root->op; - - lat = &frame->this->stats.interval.latencies[frame->op]; + double elapsed = gf_tsdiff(begin, end); if (lat->max < elapsed) lat->max = elapsed; @@ -54,42 +51,34 @@ gf_update_latency(call_frame_t *frame) lat->total += elapsed; lat->count++; -out: - return; } void -gf_proc_dump_latency_info(xlator_t *xl) +gf_latency_reset(gf_latency_t *lat) { - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; - int i; - - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name); - gf_proc_dump_add_section("%s", key_prefix); - - for (i = 0; i < GF_FOP_MAXVALUE; i++) { - gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]); - - fop_latency_t *lat = &xl->stats.interval.latencies[i]; + if (!lat) + return; + memset(lat, 0, sizeof(*lat)); + lat->min = ULLONG_MAX; + /* make sure 'min' is set to high value, so it would be + properly set later */ +} - /* Doesn't make sense to continue if there are no fops - came in the given interval */ - if (!lat->count) - continue; +void +gf_frame_latency_update(call_frame_t *frame) +{ + gf_latency_t *lat; + /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros + set it right anyways for those frames */ + if (!frame->op) + frame->op = frame->root->op; - gf_proc_dump_write( - key, "AVG:%.03f,CNT:%" PRId64 ",TOTAL:%.03f,MIN:%.03f,MAX:%.03f", - (lat->total / lat->count), lat->count, lat->total, lat->min, - lat->max); + if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) { + gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d", + frame->op); + return; } - memset(xl->stats.interval.latencies, 0, - sizeof(xl->stats.interval.latencies)); - - /* make sure 'min' is set to high value, so it would be - properly set later */ - for (i = 0; i < GF_FOP_MAXVALUE; i++) { - xl->stats.interval.latencies[i].min = 0xffffffff; - } + lat = &frame->this->stats.interval.latencies[frame->op]; + gf_latency_update(lat, &frame->begin, &frame->end); } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 47e0872ca59..5f18cd56cbe 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -451,6 +451,7 @@ gf_event_unregister_close fd_anonymous fd_anonymous_with_flags fd_bind +fd_close fd_create fd_create_uint64 __fd_ctx_del @@ -584,6 +585,7 @@ gf_dirent_free gf_dirent_orig_offset gf_dm_hashfn gf_dnscache_init +gf_dnscache_deinit gf_errno_to_error gf_error_to_errno _gf_event @@ -937,6 +939,12 @@ syncbarrier_destroy syncbarrier_init syncbarrier_wait syncbarrier_wake +synccond_init +synccond_destroy +synccond_wait +synccond_timedwait +synccond_signal +synccond_broadcast syncenv_destroy syncenv_new synclock_destroy @@ -1014,6 +1022,7 @@ synctask_new synctask_new1 synctask_set synctask_setid +synctask_sleep synctask_wake synctask_yield sys_access @@ -1072,6 +1081,8 @@ sys_write sys_writev sys_socket sys_accept +sys_kill +sys_sysctl tbf_init tbf_throttle timespec_now @@ -1172,4 +1183,11 @@ glusterfs_process_svc_detach mgmt_is_multiplexed_daemon xlator_is_cleanup_starting gf_nanosleep +gf_syncfs graph_total_client_xlator +get_xattrs_to_heal +gf_latency_statedump_and_reset +gf_latency_new +gf_latency_reset +gf_latency_update +gf_frame_latency_update diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c index 3d7d31772df..a930d3e3b63 100644 --- a/libglusterfs/src/logging.c +++ b/libglusterfs/src/logging.c @@ -35,7 +35,6 @@ #define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf" #define GF_LOG_BACKTRACE_DEPTH 5 #define GF_LOG_BACKTRACE_SIZE 4096 -#define GF_LOG_TIMESTR_SIZE 256 #define GF_MAX_SLOG_PAIR_COUNT 100 #include "glusterfs/logging.h" @@ -760,7 +759,7 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, xlator_t *this = THIS; char *logline = NULL; char *msg = NULL; - char timestr[GF_LOG_TIMESTR_SIZE] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char *callstr = NULL; @@ -820,12 +819,12 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, if (-1 == ret) goto out; - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); - ret = gf_asprintf( - &logline, "[%s.%" GF_PRI_SUSECONDS "] %c [%s:%d:%s] %s %d-%s: %s\n", - timestr, tv.tv_usec, gf_level_strings[level], basename, line, function, - callstr, ((this->graph) ? this->graph->id : 0), domain, msg); + ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %s %d-%s: %s\n", timestr, + gf_level_strings[level], basename, line, function, + callstr, ((this->graph) ? this->graph->id : 0), domain, + msg); if (-1 == ret) { goto out; } @@ -1088,7 +1087,7 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, char msg[2048] = { 0, }; - char timestr[GF_LOG_TIMESTR_SIZE] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; glusterfs_ctx_t *ctx = NULL; @@ -1116,20 +1115,20 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, ret = gettimeofday(&tv, NULL); if (-1 == ret) goto out; - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); /* TODO: Currently we print in the enhanced format, with a message ID * of 0. Need to enhance this to support format as configured */ wlen = snprintf( msg, sizeof msg, - "[%s.%" GF_PRI_SUSECONDS "] %c [MSGID: %" PRIu64 + "[%s] %c [MSGID: %" PRIu64 "]" " [%s:%d:%s] %s: no memory " "available for size (%" GF_PRI_SIZET ") current memory usage in kilobytes %ld" " [call stack follows]\n", - timestr, tv.tv_usec, gf_level_strings[level], (uint64_t)0, basename, - line, function, domain, size, + timestr, gf_level_strings[level], (uint64_t)0, basename, line, function, + domain, size, (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0)); if (-1 == wlen) { ret = -1; @@ -1275,7 +1274,7 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file, int errnum, uint64_t msgid, char **appmsgstr, char *callstr, struct timeval tv, int graph_id, gf_log_format_t fmt) { - char timestr[GF_LOG_TIMESTR_SIZE] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char *header = NULL; @@ -1286,7 +1285,7 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file, gf_log_rotate(ctx); /* format the time stamp */ - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); /* generate footer */ if (errnum) { @@ -1302,40 +1301,35 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file, if (fmt == gf_logformat_traditional) { if (!callstr) { ret = gf_asprintf(&header, - "[%s.%" GF_PRI_SUSECONDS - "] %c [%s:%d:%s]" + "[%s] %c [%s:%d:%s]" " %d-%s: %s", - timestr, tv.tv_usec, gf_level_strings[level], - file, line, function, graph_id, domain, - *appmsgstr); + timestr, gf_level_strings[level], file, line, + function, graph_id, domain, *appmsgstr); } else { ret = gf_asprintf(&header, - "[%s.%" GF_PRI_SUSECONDS - "] %c [%s:%d:%s] %s" + "[%s] %c [%s:%d:%s] %s" " %d-%s: %s", - timestr, tv.tv_usec, gf_level_strings[level], - file, line, function, callstr, graph_id, domain, - *appmsgstr); + timestr, gf_level_strings[level], file, line, + function, callstr, graph_id, domain, *appmsgstr); } } else { /* gf_logformat_withmsgid */ /* CEE log format unsupported in logger_glusterlog, so just * print enhanced log format */ if (!callstr) { ret = gf_asprintf(&header, - "[%s.%" GF_PRI_SUSECONDS "] %c [MSGID: %" PRIu64 + "[%s] %c [MSGID: %" PRIu64 "]" " [%s:%d:%s] %d-%s: %s", - timestr, tv.tv_usec, gf_level_strings[level], - msgid, file, line, function, graph_id, domain, - *appmsgstr); + timestr, gf_level_strings[level], msgid, file, + line, function, graph_id, domain, *appmsgstr); } else { ret = gf_asprintf(&header, - "[%s.%" GF_PRI_SUSECONDS "] %c [MSGID: %" PRIu64 + "[%s] %c [MSGID: %" PRIu64 "]" " [%s:%d:%s] %s %d-%s: %s", - timestr, tv.tv_usec, gf_level_strings[level], - msgid, file, line, function, callstr, graph_id, - domain, *appmsgstr); + timestr, gf_level_strings[level], msgid, file, + line, function, callstr, graph_id, domain, + *appmsgstr); } } if (-1 == ret) { @@ -1386,39 +1380,36 @@ gf_syslog_log_repetitions(const char *domain, const char *file, int graph_id) { int priority; - char timestr_latest[GF_LOG_TIMESTR_SIZE] = { + char timestr_latest[GF_TIMESTR_SIZE] = { 0, }; - char timestr_oldest[GF_LOG_TIMESTR_SIZE] = { + char timestr_oldest[GF_TIMESTR_SIZE] = { 0, }; SET_LOG_PRIO(level, priority); - gf_time_fmt(timestr_latest, sizeof timestr_latest, latest.tv_sec, - gf_timefmt_FT); - gf_time_fmt(timestr_oldest, sizeof timestr_oldest, oldest.tv_sec, - gf_timefmt_FT); + gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest, + gf_timefmt_FT); + gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest, + gf_timefmt_FT); if (errnum) { - syslog( - priority, - "The message \"[MSGID: %" PRIu64 - "] [%s:%d:%s] " - "%d-%s: %s [%s] \" repeated %d times between %s.%" GF_PRI_SUSECONDS - " and %s.%" GF_PRI_SUSECONDS, - msgid, file, line, function, graph_id, domain, *appmsgstr, - strerror(errnum), refcount, timestr_oldest, oldest.tv_usec, - timestr_latest, latest.tv_usec); + syslog(priority, + "The message \"[MSGID: %" PRIu64 + "] [%s:%d:%s] " + "%d-%s: %s [%s] \" repeated %d times between %s" + " and %s", + msgid, file, line, function, graph_id, domain, *appmsgstr, + strerror(errnum), refcount, timestr_oldest, timestr_latest); } else { syslog(priority, "The message \"[MSGID: %" PRIu64 "] [%s:%d:%s] " - "%d-%s: %s \" repeated %d times between %s.%" GF_PRI_SUSECONDS - " and %s.%" GF_PRI_SUSECONDS, + "%d-%s: %s \" repeated %d times between %s" + " and %s", msgid, file, line, function, graph_id, domain, *appmsgstr, - refcount, timestr_oldest, oldest.tv_usec, timestr_latest, - latest.tv_usec); + refcount, timestr_oldest, timestr_latest); } return 0; } @@ -1432,10 +1423,10 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain, struct timeval latest, int graph_id) { int ret = 0; - char timestr_latest[GF_LOG_TIMESTR_SIZE] = { + char timestr_latest[GF_TIMESTR_SIZE] = { 0, }; - char timestr_oldest[GF_LOG_TIMESTR_SIZE] = { + char timestr_oldest[GF_TIMESTR_SIZE] = { 0, }; char errstr[256] = { @@ -1459,21 +1450,17 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain, goto err; } - gf_time_fmt(timestr_latest, sizeof timestr_latest, latest.tv_sec, - gf_timefmt_FT); + gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest, + gf_timefmt_FT); - gf_time_fmt(timestr_oldest, sizeof timestr_oldest, oldest.tv_sec, - gf_timefmt_FT); + gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest, + gf_timefmt_FT); if (errnum) snprintf(errstr, sizeof(errstr) - 1, " [%s]", strerror(errnum)); - ret = gf_asprintf(&footer, - "%s\" repeated %d times between" - " [%s.%" GF_PRI_SUSECONDS "] and [%s.%" GF_PRI_SUSECONDS - "]", - errstr, refcount, timestr_oldest, oldest.tv_usec, - timestr_latest, latest.tv_usec); + ret = gf_asprintf(&footer, "%s\" repeated %d times between [%s] and [%s]", + errstr, refcount, timestr_oldest, timestr_latest); if (-1 == ret) { ret = -1; goto err; @@ -2013,7 +2000,7 @@ _gf_log(const char *domain, const char *file, const char *function, int line, const char *basename = NULL; FILE *new_logfile = NULL; va_list ap; - char timestr[GF_LOG_TIMESTR_SIZE] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; struct timeval tv = { @@ -2100,12 +2087,11 @@ log: if (-1 == ret) goto out; - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); - ret = gf_asprintf( - &logline, "[%s.%" GF_PRI_SUSECONDS "] %c [%s:%d:%s] %d-%s: %s\n", - timestr, tv.tv_usec, gf_level_strings[level], basename, line, function, - ((this->graph) ? this->graph->id : 0), domain, msg); + ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %d-%s: %s\n", timestr, + gf_level_strings[level], basename, line, function, + ((this->graph) ? this->graph->id : 0), domain, msg); if (-1 == ret) { goto err; } @@ -2225,7 +2211,7 @@ int gf_cmd_log(const char *domain, const char *fmt, ...) { va_list ap; - char timestr[64]; + char timestr[GF_TIMESTR_SIZE]; struct timeval tv = { 0, }; @@ -2258,10 +2244,9 @@ gf_cmd_log(const char *domain, const char *fmt, ...) goto out; } - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); - ret = gf_asprintf(&logline, "[%s.%" GF_PRI_SUSECONDS "] %s : %s\n", timestr, - tv.tv_usec, domain, msg); + ret = gf_asprintf(&logline, "[%s] %s : %s\n", timestr, domain, msg); if (ret == -1) { goto out; } diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c index 1c31c27d95b..2d5a12b0a00 100644 --- a/libglusterfs/src/mem-pool.c +++ b/libglusterfs/src/mem-pool.c @@ -362,6 +362,30 @@ free: FREE(ptr); } +#if defined(GF_DISABLE_MEMPOOL) + +struct mem_pool * +mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, + unsigned long count, char *name) +{ + struct mem_pool *new; + + new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool); + if (!new) + return NULL; + + new->sizeof_type = sizeof_type; + return new; +} + +void +mem_pool_destroy(struct mem_pool *pool) +{ + GF_FREE(pool); +} + +#else /* !GF_DISABLE_MEMPOOL */ + static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER; static struct list_head pool_threads; static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER; @@ -371,7 +395,6 @@ static size_t pool_list_size; static __thread per_thread_pool_list_t *thread_pool_list = NULL; -#if !defined(GF_DISABLE_MEMPOOL) #define N_COLD_LISTS 1024 #define POOL_SWEEP_SECS 30 @@ -617,21 +640,29 @@ mem_pools_fini(void) pthread_mutex_unlock(&init_mutex); } -#else void -mem_pools_init(void) -{ -} -void -mem_pools_fini(void) -{ -} -void -mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) +mem_pool_destroy(struct mem_pool *pool) { -} + if (!pool) + return; -#endif + /* remove this pool from the owner (glusterfs_ctx_t) */ + LOCK(&pool->ctx->lock); + { + list_del(&pool->owner); + } + UNLOCK(&pool->ctx->lock); + + /* free this pool, but keep the mem_pool_shared */ + GF_FREE(pool); + + /* + * Pools are now permanent, so the mem_pool->pool is kept around. All + * of the objects *in* the pool will eventually be freed via the + * pool-sweeper thread, and this way we don't have to add a lot of + * reference-counting complexity. + */ +} struct mem_pool * mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, @@ -682,6 +713,7 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, new->sizeof_type = sizeof_type; new->count = count; new->name = name; + new->xl_name = THIS->name; new->pool = pool; GF_ATOMIC_INIT(new->active, 0); #ifdef DEBUG @@ -699,21 +731,6 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, return new; } -void * -mem_get0(struct mem_pool *mem_pool) -{ - void *ptr = mem_get(mem_pool); - if (ptr) { -#if defined(GF_DISABLE_MEMPOOL) - memset(ptr, 0, mem_pool->sizeof_type); -#else - memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two)); -#endif - } - - return ptr; -} - per_thread_pool_list_t * mem_get_pool_list(void) { @@ -822,6 +839,23 @@ mem_get_from_pool(struct mem_pool *mem_pool) return retval; } +#endif /* GF_DISABLE_MEMPOOL */ + +void * +mem_get0(struct mem_pool *mem_pool) +{ + void *ptr = mem_get(mem_pool); + if (ptr) { +#if defined(GF_DISABLE_MEMPOOL) + memset(ptr, 0, mem_pool->sizeof_type); +#else + memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two)); +#endif + } + + return ptr; +} + void * mem_get(struct mem_pool *mem_pool) { @@ -896,27 +930,3 @@ mem_put(void *ptr) } #endif /* GF_DISABLE_MEMPOOL */ } - -void -mem_pool_destroy(struct mem_pool *pool) -{ - if (!pool) - return; - - /* remove this pool from the owner (glusterfs_ctx_t) */ - LOCK(&pool->ctx->lock); - { - list_del(&pool->owner); - } - UNLOCK(&pool->ctx->lock); - - /* free this pool, but keep the mem_pool_shared */ - GF_FREE(pool); - - /* - * Pools are now permanent, so the mem_pool->pool is kept around. All - * of the objects *in* the pool will eventually be freed via the - * pool-sweeper thread, and this way we don't have to add a lot of - * reference-counting complexity. - */ -} diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c index 45e3d776903..fbb68dc8622 100644 --- a/libglusterfs/src/monitoring.c +++ b/libglusterfs/src/monitoring.c @@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd) dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name, gf_fop_list[index], cbk); } - if (xl->stats.interval.latencies[index].count != 0.0) { + if (xl->stats.interval.latencies[index].count != 0) { dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name, gf_fop_list[index], - (xl->stats.interval.latencies[index].total / + (((double)xl->stats.interval.latencies[index].total) / xl->stats.interval.latencies[index].count)); - dprintf(fd, "%s.interval.%s.max %lf\n", xl->name, + dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name, gf_fop_list[index], xl->stats.interval.latencies[index].max); - dprintf(fd, "%s.interval.%s.min %lf\n", xl->name, + dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name, gf_fop_list[index], xl->stats.interval.latencies[index].min); } diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c index 371f60c4436..1531f0da43f 100644 --- a/libglusterfs/src/stack.c +++ b/libglusterfs/src/stack.c @@ -92,7 +92,7 @@ gf_proc_dump_call_frame(call_frame_t *call_frame, const char *key_buf, ...) }; int ret = -1; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; int len; @@ -162,7 +162,7 @@ gf_proc_dump_call_stack(call_stack_t *call_stack, const char *key_buf, ...) va_list ap; call_frame_t *trav; int32_t i = 1, cnt = 0; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; int len; diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c index 5e0f04f3217..65f0eb5c7f3 100644 --- a/libglusterfs/src/statedump.c +++ b/libglusterfs/src/statedump.c @@ -199,6 +199,40 @@ gf_proc_dump_write(char *key, char *value, ...) return ret; } +void +gf_latency_statedump_and_reset(char *key, gf_latency_t *lat) +{ + /* Doesn't make sense to continue if there are no fops + came in the given interval */ + if (!lat || !lat->count) + return; + gf_proc_dump_write(key, + "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64 + " MAX:%" PRIu64, + (((double)lat->total) / lat->count), lat->count, + lat->total, lat->min, lat->max); + gf_latency_reset(lat); +} + +void +gf_proc_dump_xl_latency_info(xlator_t *xl) +{ + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name); + gf_proc_dump_add_section("%s", key_prefix); + + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]); + + gf_latency_t *lat = &xl->stats.interval.latencies[i]; + + gf_latency_statedump_and_reset(key, lat); + } +} + static void gf_proc_dump_xlator_mem_info(xlator_t *xl) { @@ -270,7 +304,7 @@ gf_proc_dump_xlator_mem_info_only_in_use(xlator_t *xl) void gf_proc_dump_mem_info() { -#ifdef HAVE_MALLOC_STATS +#ifdef HAVE_MALLINFO struct mallinfo info; memset(&info, 0, sizeof(struct mallinfo)); @@ -296,7 +330,7 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict) { if (!dict) return; -#ifdef HAVE_MALLOC_STATS +#ifdef HAVE_MALLINFO struct mallinfo info; int ret = -1; @@ -349,26 +383,13 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict) void gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx) { +#ifdef GF_DISABLE_MEMPOOL + gf_proc_dump_write("built with --disable-mempool", " so no memory pools"); +#else struct mem_pool *pool = NULL; gf_proc_dump_add_section("mempool"); -#if defined(OLD_MEM_POOLS) - list_for_each_entry(pool, &ctx->mempool_list, global_list) - { - gf_proc_dump_write("-----", "-----"); - gf_proc_dump_write("pool-name", "%s", pool->name); - gf_proc_dump_write("hot-count", "%d", pool->hot_count); - gf_proc_dump_write("cold-count", "%d", pool->cold_count); - gf_proc_dump_write("padded_sizeof", "%lu", pool->padded_sizeof_type); - gf_proc_dump_write("alloc-count", "%" PRIu64, pool->alloc_count); - gf_proc_dump_write("max-alloc", "%d", pool->max_alloc); - - gf_proc_dump_write("pool-misses", "%" PRIu64, pool->pool_misses); - gf_proc_dump_write("cur-stdalloc", "%d", pool->curr_stdalloc); - gf_proc_dump_write("max-stdalloc", "%d", pool->max_stdalloc); - } -#else LOCK(&ctx->lock); { list_for_each_entry(pool, &ctx->mempool_list, owner) @@ -377,6 +398,7 @@ gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx) gf_proc_dump_write("-----", "-----"); gf_proc_dump_write("pool-name", "%s", pool->name); + gf_proc_dump_write("xlator-name", "%s", pool->xl_name); gf_proc_dump_write("active-count", "%" GF_PRI_ATOMIC, active); gf_proc_dump_write("sizeof-type", "%lu", pool->sizeof_type); gf_proc_dump_write("padded-sizeof", "%d", @@ -387,15 +409,13 @@ gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx) } } UNLOCK(&ctx->lock); - - /* TODO: details of (struct mem_pool_shared) pool->pool */ -#endif +#endif /* GF_DISABLE_MEMPOOL */ } void gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict) { -#if defined(OLD_MEM_POOLS) +#ifndef GF_DISABLE_MEMPOOL struct mem_pool *pool = NULL; char key[GF_DUMP_MAX_BUF_LEN] = { 0, @@ -406,51 +426,47 @@ gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict) if (!ctx || !dict) return; - list_for_each_entry(pool, &ctx->mempool_list, global_list) + LOCK(&ctx->lock); { - snprintf(key, sizeof(key), "pool%d.name", count); - ret = dict_set_str(dict, key, pool->name); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.hotcount", count); - ret = dict_set_int32(dict, key, pool->hot_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.coldcount", count); - ret = dict_set_int32(dict, key, pool->cold_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.paddedsizeof", count); - ret = dict_set_uint64(dict, key, pool->padded_sizeof_type); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.alloccount", count); - ret = dict_set_uint64(dict, key, pool->alloc_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.max_alloc", count); - ret = dict_set_int32(dict, key, pool->max_alloc); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.max-stdalloc", count); - ret = dict_set_int32(dict, key, pool->max_stdalloc); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.pool-misses", count); - ret = dict_set_uint64(dict, key, pool->pool_misses); - if (ret) - return; - count++; + list_for_each_entry(pool, &ctx->mempool_list, owner) + { + int64_t active = GF_ATOMIC_GET(pool->active); + + snprintf(key, sizeof(key), "pool%d.name", count); + ret = dict_set_str(dict, key, pool->name); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.active-count", count); + ret = dict_set_uint64(dict, key, active); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.sizeof-type", count); + ret = dict_set_uint64(dict, key, pool->sizeof_type); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.padded-sizeof", count); + ret = dict_set_uint64(dict, key, 1 << pool->pool->power_of_two); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.size", count); + ret = dict_set_uint64(dict, key, + (1 << pool->pool->power_of_two) * active); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.shared-pool", count); + ret = dict_set_static_ptr(dict, key, pool->pool); + if (ret) + goto out; + } } - ret = dict_set_int32(dict, "mempool-count", count); -#endif +out: + UNLOCK(&ctx->lock); +#endif /* !GF_DISABLE_MEMPOOL */ } void @@ -485,7 +501,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav) return; if (ctx->measure_latency) - gf_proc_dump_latency_info(trav); + gf_proc_dump_xl_latency_info(trav); gf_proc_dump_xlator_mem_info(trav); @@ -782,7 +798,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) char brick_name[PATH_MAX] = { 0, }; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char sign_string[512] = { @@ -842,7 +858,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) ? dump_options.dump_path : ((ctx->statedump_path != NULL) ? ctx->statedump_path : DEFAULT_VAR_RUN_DIRECTORY)), - brick_name, getpid(), (uint64_t)time(NULL)); + brick_name, getpid(), (uint64_t)gf_time()); if ((ret < 0) || (ret >= sizeof(path))) { goto out; } @@ -861,10 +877,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) // continue even though gettimeofday() has failed ret = gettimeofday(&tv, NULL); if (0 == ret) { - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - len = strlen(timestr); - snprintf(timestr + len, sizeof timestr - len, ".%" GF_PRI_SUSECONDS, - tv.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); } len = snprintf(sign_string, sizeof(sign_string), "DUMP-START-TIME: %s\n", @@ -913,10 +926,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) ret = gettimeofday(&tv, NULL); if (0 == ret) { - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - len = strlen(timestr); - snprintf(timestr + len, sizeof timestr - len, ".%" GF_PRI_SUSECONDS, - tv.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); } len = snprintf(sign_string, sizeof(sign_string), "\nDUMP-END-TIME: %s", @@ -1035,7 +1045,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd) { gf_dump_strfd = strfd; - gf_proc_dump_latency_info(this); + gf_proc_dump_xl_latency_info(this); gf_dump_strfd = NULL; } diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c index f444741ef0c..5c316b9291a 100644 --- a/libglusterfs/src/store.c +++ b/libglusterfs/src/store.c @@ -649,23 +649,25 @@ out: } int32_t -gf_store_iter_destroy(gf_store_iter_t *iter) +gf_store_iter_destroy(gf_store_iter_t **iter) { int32_t ret = -1; - if (!iter) + if (!(*iter)) return 0; /* gf_store_iter_new will not return a valid iter object with iter->file * being NULL*/ - ret = fclose(iter->file); + ret = fclose((*iter)->file); if (ret) gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, "Unable" " to close file: %s, ret: %d", - iter->filepath, ret); + (*iter)->filepath, ret); + + GF_FREE(*iter); + *iter = NULL; - GF_FREE(iter); return ret; } diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 0de53c6f4cf..df20cec559f 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -11,6 +11,10 @@ #include "glusterfs/syncop.h" #include "glusterfs/libglusterfs-messages.h" +#ifdef HAVE_TSAN_API +#include <sanitizer/tsan_interface.h> +#endif + int syncopctx_setfsuid(void *uid) { @@ -154,10 +158,14 @@ out: return ret; } +void * +syncenv_processor(void *thdata); + static void __run(struct synctask *task) { struct syncenv *env = NULL; + int32_t total, ret, i; env = task->env; @@ -173,7 +181,6 @@ __run(struct synctask *task) env->runcount--; break; case SYNCTASK_WAIT: - env->waitcount--; break; case SYNCTASK_DONE: gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, @@ -187,8 +194,27 @@ __run(struct synctask *task) } list_add_tail(&task->all_tasks, &env->runq); - env->runcount++; task->state = SYNCTASK_RUN; + + env->runcount++; + + total = env->procs + env->runcount - env->procs_idle; + if (total > env->procmax) { + total = env->procmax; + } + if (total > env->procs) { + for (i = 0; i < env->procmax; i++) { + if (env->proc[i].env == NULL) { + env->proc[i].env = env; + ret = gf_thread_create(&env->proc[i].processor, NULL, + syncenv_processor, &env->proc[i], + "sproc%d", i); + if ((ret < 0) || (++env->procs >= total)) { + break; + } + } + } + } } static void @@ -210,7 +236,6 @@ __wait(struct synctask *task) gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK, "re-waiting already waiting " "task"); - env->waitcount--; break; case SYNCTASK_DONE: gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, @@ -223,12 +248,11 @@ __wait(struct synctask *task) } list_add_tail(&task->all_tasks, &env->waitq); - env->waitcount++; task->state = SYNCTASK_WAIT; } void -synctask_yield(struct synctask *task) +synctask_yield(struct synctask *task, struct timespec *delta) { xlator_t *oldTHIS = THIS; @@ -237,9 +261,16 @@ synctask_yield(struct synctask *task) task->proc->sched.uc_flags &= ~_UC_TLSBASE; #endif + task->delta = delta; + if (task->state != SYNCTASK_DONE) { task->state = SYNCTASK_SUSPEND; } + +#ifdef HAVE_TSAN_API + __tsan_switch_to_fiber(task->proc->tsan.fiber, 0); +#endif + if (swapcontext(&task->ctx, &task->proc->sched) < 0) { gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED, "swapcontext failed"); @@ -249,6 +280,35 @@ synctask_yield(struct synctask *task) } void +synctask_sleep(int32_t secs) +{ + struct timespec delta; + struct synctask *task; + + task = synctask_get(); + + if (task == NULL) { + sleep(secs); + } else { + delta.tv_sec = secs; + delta.tv_nsec = 0; + + synctask_yield(task, &delta); + } +} + +static void +__synctask_wake(struct synctask *task) +{ + task->woken = 1; + + if (task->slept) + __run(task); + + pthread_cond_broadcast(&task->env->cond); +} + +void synctask_wake(struct synctask *task) { struct syncenv *env = NULL; @@ -257,13 +317,18 @@ synctask_wake(struct synctask *task) pthread_mutex_lock(&env->mutex); { - task->woken = 1; + if (task->timer != NULL) { + if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) { + goto unlock; + } - if (task->slept) - __run(task); + task->timer = NULL; + task->synccond = NULL; + } - pthread_cond_broadcast(&env->cond); + __synctask_wake(task); } +unlock: pthread_mutex_unlock(&env->mutex); } @@ -282,7 +347,7 @@ synctask_wrap(void) task->state = SYNCTASK_DONE; - synctask_yield(task); + synctask_yield(task, NULL); } void @@ -301,6 +366,10 @@ synctask_destroy(struct synctask *task) pthread_cond_destroy(&task->cond); } +#ifdef HAVE_TSAN_API + __tsan_destroy_fiber(task->tsan.fiber); +#endif + GF_FREE(task); } @@ -411,6 +480,13 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, makecontext(&newtask->ctx, (void (*)(void))synctask_wrap, 0); +#ifdef HAVE_TSAN_API + newtask->tsan.fiber = __tsan_create_fiber(0); + snprintf(newtask->tsan.name, TSAN_THREAD_NAMELEN, "<synctask of %s>", + this->name); + __tsan_set_fiber_name(newtask->tsan.fiber, newtask->tsan.name); +#endif + newtask->state = SYNCTASK_INIT; newtask->slept = 1; @@ -422,11 +498,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, } synctask_wake(newtask); - /* - * Make sure someone's there to execute anything we just put on the - * run queue. - */ - syncenv_scale(env); return newtask; err: @@ -520,8 +591,12 @@ syncenv_task(struct syncproc *proc) goto unlock; } - sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME; + env->procs_idle++; + + sleep_till.tv_sec = gf_time() + SYNCPROC_IDLE_TIME; ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till); + + env->procs_idle--; } task = list_entry(env->runq.next, struct synctask, all_tasks); @@ -540,6 +615,34 @@ unlock: return task; } +static void +synctask_timer(void *data) +{ + struct synctask *task = data; + struct synccond *cond; + + cond = task->synccond; + if (cond != NULL) { + pthread_mutex_lock(&cond->pmutex); + + list_del_init(&task->waitq); + task->synccond = NULL; + + pthread_mutex_unlock(&cond->pmutex); + + task->ret = -ETIMEDOUT; + } + + pthread_mutex_lock(&task->env->mutex); + + gf_timer_call_cancel(task->xl->ctx, task->timer); + task->timer = NULL; + + __synctask_wake(task); + + pthread_mutex_unlock(&task->env->mutex); +} + void synctask_switchto(struct synctask *task) { @@ -555,6 +658,10 @@ synctask_switchto(struct synctask *task) task->ctx.uc_flags &= ~_UC_TLSBASE; #endif +#ifdef HAVE_TSAN_API + __tsan_switch_to_fiber(task->tsan.fiber, 0); +#endif + if (swapcontext(&task->proc->sched, &task->ctx) < 0) { gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED, "swapcontext failed"); @@ -572,7 +679,14 @@ synctask_switchto(struct synctask *task) } else { task->slept = 1; __wait(task); + + if (task->delta != NULL) { + task->timer = gf_timer_call_after(task->xl->ctx, *task->delta, + synctask_timer, task); + } } + + task->delta = NULL; } pthread_mutex_unlock(&env->mutex); } @@ -580,63 +694,27 @@ synctask_switchto(struct synctask *task) void * syncenv_processor(void *thdata) { - struct syncenv *env = NULL; struct syncproc *proc = NULL; struct synctask *task = NULL; proc = thdata; - env = proc->env; - for (;;) { - task = syncenv_task(proc); - if (!task) - break; +#ifdef HAVE_TSAN_API + proc->tsan.fiber = __tsan_create_fiber(0); + snprintf(proc->tsan.name, TSAN_THREAD_NAMELEN, "<sched of syncenv@%p>", + proc); + __tsan_set_fiber_name(proc->tsan.fiber, proc->tsan.name); +#endif + while ((task = syncenv_task(proc)) != NULL) { synctask_switchto(task); - - syncenv_scale(env); } - return NULL; -} - -void -syncenv_scale(struct syncenv *env) -{ - int diff = 0; - int scale = 0; - int i = 0; - int ret = 0; - - pthread_mutex_lock(&env->mutex); - { - if (env->procs > env->runcount) - goto unlock; - - scale = env->runcount; - if (scale > env->procmax) - scale = env->procmax; - if (scale > env->procs) - diff = scale - env->procs; - while (diff) { - diff--; - for (; (i < env->procmax); i++) { - if (env->proc[i].processor == 0) - break; - } +#ifdef HAVE_TSAN_API + __tsan_destroy_fiber(proc->tsan.fiber); +#endif - env->proc[i].env = env; - ret = gf_thread_create(&env->proc[i].processor, NULL, - syncenv_processor, &env->proc[i], - "sproc%03hx", env->procs & 0x3ff); - if (ret) - break; - env->procs++; - i++; - } - } -unlock: - pthread_mutex_unlock(&env->mutex); + return NULL; } /* The syncenv threads are cleaned up in this routine. @@ -715,12 +793,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax) newenv->stacksize = stacksize; newenv->procmin = procmin; newenv->procmax = procmax; + newenv->procs_idle = 0; for (i = 0; i < newenv->procmin; i++) { newenv->proc[i].env = newenv; ret = gf_thread_create(&newenv->proc[i].processor, NULL, syncenv_processor, &newenv->proc[i], "sproc%d", - newenv->procs); + i); if (ret) break; newenv->procs++; @@ -810,7 +889,7 @@ __synclock_lock(struct synclock *lock) task->woken = 0; list_add_tail(&task->waitq, &lock->waitq); pthread_mutex_unlock(&lock->guard); - synctask_yield(task); + synctask_yield(task, NULL); /* task is removed from waitq in unlock, * under lock->guard.*/ pthread_mutex_lock(&lock->guard); @@ -963,6 +1042,136 @@ synclock_unlock(synclock_t *lock) return ret; } +/* Condition variables */ + +int32_t +synccond_init(synccond_t *cond) +{ + int32_t ret; + + INIT_LIST_HEAD(&cond->waitq); + + ret = pthread_mutex_init(&cond->pmutex, NULL); + if (ret != 0) { + return -ret; + } + + ret = pthread_cond_init(&cond->pcond, NULL); + if (ret != 0) { + pthread_mutex_destroy(&cond->pmutex); + } + + return -ret; +} + +void +synccond_destroy(synccond_t *cond) +{ + pthread_cond_destroy(&cond->pcond); + pthread_mutex_destroy(&cond->pmutex); +} + +int +synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta) +{ + struct timespec now; + struct synctask *task = NULL; + int ret; + + task = synctask_get(); + + if (task == NULL) { + if (delta != NULL) { + timespec_now_realtime(&now); + timespec_adjust_delta(&now, *delta); + } + + pthread_mutex_lock(&cond->pmutex); + + if (delta == NULL) { + ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex); + } else { + ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now); + } + } else { + pthread_mutex_lock(&cond->pmutex); + + list_add_tail(&task->waitq, &cond->waitq); + task->synccond = cond; + + ret = synclock_unlock(lock); + if (ret == 0) { + pthread_mutex_unlock(&cond->pmutex); + + synctask_yield(task, delta); + + ret = synclock_lock(lock); + if (ret == 0) { + ret = task->ret; + } + task->ret = 0; + + return ret; + } + + list_del_init(&task->waitq); + } + + pthread_mutex_unlock(&cond->pmutex); + + return ret; +} + +int +synccond_wait(synccond_t *cond, synclock_t *lock) +{ + return synccond_timedwait(cond, lock, NULL); +} + +void +synccond_signal(synccond_t *cond) +{ + struct synctask *task; + + pthread_mutex_lock(&cond->pmutex); + + if (!list_empty(&cond->waitq)) { + task = list_first_entry(&cond->waitq, struct synctask, waitq); + list_del_init(&task->waitq); + + pthread_mutex_unlock(&cond->pmutex); + + synctask_wake(task); + } else { + pthread_cond_signal(&cond->pcond); + + pthread_mutex_unlock(&cond->pmutex); + } +} + +void +synccond_broadcast(synccond_t *cond) +{ + struct list_head list; + struct synctask *task; + + INIT_LIST_HEAD(&list); + + pthread_mutex_lock(&cond->pmutex); + + list_splice_init(&cond->waitq, &list); + pthread_cond_broadcast(&cond->pcond); + + pthread_mutex_unlock(&cond->pmutex); + + while (!list_empty(&list)) { + task = list_first_entry(&list, struct synctask, waitq); + list_del_init(&task->waitq); + + synctask_wake(task); + } +} + /* Barriers */ int @@ -1032,7 +1241,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor) /* called within a synctask */ list_add_tail(&task->waitq, &barrier->waitq); pthread_mutex_unlock(&barrier->guard); - synctask_yield(task); + synctask_yield(task, NULL); pthread_mutex_lock(&barrier->guard); } else { /* called by a non-synctask */ @@ -2881,12 +3090,13 @@ syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what, SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset, what, xdata_in); - if (*off) - *off = args.offset; - - if (args.op_ret == -1) + if (args.op_ret < 0) { return -args.op_errno; - return args.op_ret; + } else { + if (off) + *off = args.offset; + return args.op_ret; + } } int diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index 49f782a837a..04400f98b6c 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -13,6 +13,10 @@ #include "glusterfs/mem-pool.h" #include "glusterfs/libglusterfs-messages.h" +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#include <signal.h> +#endif #include <sys/types.h> #include <utime.h> #include <sys/time.h> @@ -506,7 +510,7 @@ sys_lsetxattr(const char *path, const char *name, const void *value, #endif #ifdef GF_BSD_HOST_OS - return FS_RET_CHECK0( + return FS_RET_CHECK( extattr_set_link(path, EXTATTR_NAMESPACE_USER, name, value, size), errno); #endif @@ -624,7 +628,7 @@ sys_fsetxattr(int filedes, const char *name, const void *value, size_t size, #endif #ifdef GF_BSD_HOST_OS - return FS_RET_CHECK0( + return FS_RET_CHECK( extattr_set_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size), errno); #endif @@ -849,7 +853,24 @@ sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags); #else - return -ENOSYS; + errno = ENOSYS; + return -1; #endif /* HAVE_COPY_FILE_RANGE_SYS */ #endif /* HAVE_COPY_FILE_RANGE */ } + +#ifdef __FreeBSD__ +int +sys_kill(pid_t pid, int sig) +{ + return FS_RET_CHECK0(kill(pid, sig), errno); +} + +int +sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp, + const void *newp, size_t newlen) +{ + return FS_RET_CHECK0(sysctl(name, namelen, oldp, oldlenp, newp, newlen), + errno); +} +#endif diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index e793e1e3d68..9a2582d45d5 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -184,9 +184,11 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle, volume_opt_list_t *opt_list) { int ret = -1; + int flag = 0; char *name = NULL; void *handle = NULL; xlator_api_t *xlapi = NULL; + volume_option_t *opt = NULL; GF_VALIDATE_OR_GOTO("xlator", xlator_type, out); @@ -194,8 +196,10 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle, * need this check */ if (!strstr(xlator_type, "rpc-transport")) ret = gf_asprintf(&name, "%s/%s.so", XLATORDIR, xlator_type); - else + else { + flag = 1; ret = gf_asprintf(&name, "%s/%s.so", XLATORPARENTDIR, xlator_type); + } if (-1 == ret) { goto out; } @@ -211,18 +215,29 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle, goto out; } - /* check new struct first, and then check this */ - xlapi = dlsym(handle, "xlator_api"); - if (!xlapi) { - gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s", - dlerror(), NULL); - goto out; - } + if (flag == 0) { + /* check new struct first, and then check this */ + xlapi = dlsym(handle, "xlator_api"); + if (!xlapi) { + gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s", + dlerror(), NULL); + goto out; + } - opt_list->given_opt = xlapi->options; - if (!opt_list->given_opt) { - gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, NULL); - goto out; + opt_list->given_opt = xlapi->options; + if (!opt_list->given_opt) { + gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, NULL); + goto out; + } + } else { + opt = dlsym(handle, "options"); + if (!opt) { + gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s", + dlerror(), NULL); + goto out; + } + + opt_list->given_opt = opt; } *dl_handle = handle; @@ -245,6 +260,7 @@ xlator_dynload_apis(xlator_t *xl) void *handle = NULL; volume_opt_list_t *vol_opt = NULL; xlator_api_t *xlapi = NULL; + int i = 0; handle = xl->dlhandle; @@ -342,6 +358,10 @@ xlator_dynload_apis(xlator_t *xl) memcpy(xl->op_version, xlapi->op_version, sizeof(uint32_t) * GF_MAX_RELEASES); + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + gf_latency_reset(&xl->stats.interval.latencies[i]); + } + ret = 0; out: return ret; @@ -806,7 +826,7 @@ xlator_members_free(xlator_t *xl) GF_FREE(xl->name); GF_FREE(xl->type); - if (!(xl->ctx && xl->ctx->cmd_args.valgrind) && xl->dlhandle) + if (!(xl->ctx && xl->ctx->cmd_args.vgtool != _gf_none) && xl->dlhandle) dlclose(xl->dlhandle); if (xl->options) dict_unref(xl->options); |