diff options
Diffstat (limited to 'xlators/features')
55 files changed, 3112 insertions, 954 deletions
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 194634b003d..c57897f11ea 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC CLOUDSYNC_DIR = cloudsync endif +if BUILD_METADISP + METADISP_DIR = metadisp +endif + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ compress changelog gfid-access snapview-client snapview-server trash \ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ - utime + utime $(METADISP_DIR) CLEANFILES = diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h index 3d40089dc95..5bc5103a27c 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h @@ -47,6 +47,55 @@ GLFS_MSGID(BITROT_BITD, BRB_MSG_FD_CREATE_FAILED, BRB_MSG_READV_FAILED, BRB_MSG_SCRUB_THREAD_CLEANUP, BRB_MSG_SCRUBBER_CLEANED, BRB_MSG_GENERIC_SSM_INFO, BRB_MSG_ZERO_TIMEOUT_BUG, BRB_MSG_BAD_OBJ_READDIR_FAIL, BRB_MSG_SSM_FAILED, - BRB_MSG_SCRUB_WAIT_FAILED); + BRB_MSG_SCRUB_WAIT_FAILED, BRB_MSG_TRIGGER_SIGN_FAILED, + BRB_MSG_EVENT_UNHANDLED, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, + BRB_MSG_THREAD_CREATION_FAILED, BRB_MSG_MEM_POOL_ALLOC, + BRB_MSG_SAVING_HASH_FAILED); +#define BRB_MSG_FD_CREATE_FAILED_STR "failed to create fd for the inode" +#define BRB_MSG_READV_FAILED_STR "readv failed" +#define BRB_MSG_BLOCK_READ_FAILED_STR "reading block failed" +#define BRB_MSG_NO_MEMORY_STR "failed to allocate memory" +#define BRB_MSG_CALC_CHECKSUM_FAILED_STR "calculating checksum failed" +#define BRB_MSG_GET_SIGN_FAILED_STR "failed to get the signature" +#define BRB_MSG_SET_SIGN_FAILED_STR "signing failed" +#define BRB_MSG_OP_FAILED_STR "failed on object" +#define BRB_MSG_TRIGGER_SIGN_FAILED_STR "Could not trigger signing" +#define BRB_MSG_READ_AND_SIGN_FAILED_STR "reading and signing of object failed" +#define BRB_MSG_SET_TIMER_FAILED_STR "Failed to allocate object expiry timer" +#define BRB_MSG_GET_SUBVOL_FAILED_STR \ + "failed to get the subvolume for the brick" +#define BRB_MSG_PATH_FAILED_STR "path failed" +#define BRB_MSG_SKIP_OBJECT_STR "Entry is marked corrupted. skipping" +#define BRB_MSG_PARTIAL_VERSION_PRESENCE_STR \ + "PArtial version xattr presence detected, ignoring" +#define BRB_MSG_TRIGGER_SIGN_STR "Triggering signing" +#define BRB_MSG_CRAWLING_START_STR \ + "Crawling brick, scanning for unsigned objects" +#define BRB_MSG_CRAWLING_FINISH_STR "Completed crawling brick" +#define BRB_MSG_REGISTER_FAILED_STR "Register to changelog failed" +#define BRB_MSG_SPAWN_FAILED_STR "failed to spawn" +#define BRB_MSG_CONNECTED_TO_BRICK_STR "Connected to brick" +#define BRB_MSG_LOOKUP_FAILED_STR "lookup on root failed" +#define BRB_MSG_GET_INFO_FAILED_STR "failed to get stub info" +#define BRB_MSG_SCRUB_THREAD_CLEANUP_STR "Error cleaning up scanner thread" +#define BRB_MSG_SCRUBBER_CLEANED_STR "clened up scrubber for brick" +#define BRB_MSG_SUBVOL_CONNECT_FAILED_STR \ + "callback handler for subvolume failed" +#define BRB_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" +#define BRB_MSG_EVENT_UNHANDLED_STR "Event unhandled for child" +#define BRB_MSG_INVALID_SUBVOL_STR "Got event from invalid subvolume" +#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED_STR \ + "on demand scrub schedule failed. Scrubber is not in pending state." +#define BRB_MSG_COULD_NOT_SCHEDULE_SCRUB_STR \ + "Could not schedule ondemand scrubbing. Scrubbing will continue " \ + "according to old frequency." +#define BRB_MSG_THREAD_CREATION_FAILED_STR "thread creation failed" +#define BRB_MSG_RATE_LIMIT_INFO_STR "Rate Limit Info" +#define BRB_MSG_MEM_POOL_ALLOC_STR "failed to allocate mem-pool for timer" +#define BRB_MSG_NO_CHILD_STR "FATAL: no children" +#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE_STR "global timer wheel unavailable" +#define BRB_MSG_BITROT_LOADED_STR "bit-rot xlator loaded" +#define BRB_MSG_SAVING_HASH_FAILED_STR \ + "failed to allocate memory for saving hash of the object" #endif /* !_BITROT_BITD_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c index 34e20f9df11..5cef2ffa5e5 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c @@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat) } void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv) +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time) { if (!scrub_stat) return; pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_start_time = time; } pthread_mutex_unlock(&scrub_stat->lock); } void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv) + time_t time) { int lst_size = 0; @@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_end_time = time; - scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec - - scrub_stat->scrub_start_tv.tv_sec; + scrub_stat->scrub_duration = scrub_stat->scrub_end_time - + scrub_stat->scrub_start_time; snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr); } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h index 24128b90a66..f022aa831eb 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h @@ -15,20 +15,22 @@ #include <sys/time.h> #include <pthread.h> +#include <glusterfs/common-utils.h> + struct br_scrub_stats { - uint64_t scrubbed_files; /* Total number of scrubbed file */ + uint64_t scrubbed_files; /* Total number of scrubbed files. */ - uint64_t unsigned_files; /* Total number of unsigned file */ + uint64_t unsigned_files; /* Total number of unsigned files. */ - uint64_t scrub_duration; /* Duration of last scrub */ + uint64_t scrub_duration; /* Duration of last scrub. */ - char last_scrub_time[1024]; /*last scrub completion time */ + char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */ - struct timeval scrub_start_tv; /* Scrubbing starting time*/ + time_t scrub_start_time; /* Scrubbing starting time. */ - struct timeval scrub_end_tv; /* Scrubbing finishing time */ + time_t scrub_end_time; /* Scrubbing finishing time. */ - int8_t scrub_running; /* Scrub running or not */ + int8_t scrub_running; /* Whether scrub running or not. */ pthread_mutex_t lock; }; @@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat); void br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat); void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv); +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time); void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv); + time_t time); #endif /* __BIT_ROT_SCRUB_STATUS_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index d20ecc7cdbe..289dd53f610 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -601,25 +601,23 @@ br_fsscan_deactivate(xlator_t *this) static void br_scrubber_log_time(xlator_t *this, const char *sfx) { - char timestr[1024] = { - 0, - }; - struct timeval tv = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; br_private_t *priv = NULL; + time_t now = 0; + now = gf_time(); priv = this->private; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { - br_update_scrub_start_time(&priv->scrub_stat, &tv); + br_update_scrub_start_time(&priv->scrub_stat, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, "Scrubbing %s at %s", sfx, timestr); } else { - br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv); + br_update_scrub_finish_time(&priv->scrub_stat, timestr, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, "Scrubbing %s at %s", sfx, timestr); } @@ -628,15 +626,13 @@ br_scrubber_log_time(xlator_t *this, const char *sfx) static void br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx) { - char timestr[1024] = { - 0, - }; - struct timeval tv = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + now = gf_time(); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s", @@ -919,10 +915,7 @@ br_fsscan_schedule(xlator_t *this) { uint32_t timo = 0; br_private_t *priv = NULL; - struct timeval tv = { - 0, - }; - char timestr[1024] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; struct br_scrubber *fsscrub = NULL; @@ -933,8 +926,7 @@ br_fsscan_schedule(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&tv, NULL); - scrub_monitor->boot = tv.tv_sec; + scrub_monitor->boot = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { @@ -975,12 +967,10 @@ int32_t br_fsscan_activate(xlator_t *this) { uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -989,7 +979,7 @@ br_fsscan_activate(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1003,7 +993,7 @@ br_fsscan_activate(xlator_t *this) } pthread_mutex_unlock(&scrub_monitor->donelock); - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); (void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo); _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); @@ -1020,12 +1010,10 @@ br_fsscan_reschedule(xlator_t *this) { int32_t ret = 0; uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -1037,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this) if (!fsscrub->frequency_reconf) return 0; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1045,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this) return -1; } - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { @@ -1073,23 +1061,19 @@ br_fsscan_ondemand(xlator_t *this) { int32_t ret = 0; uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_monitor *scrub_monitor = NULL; priv = this->private; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); - + now = gf_time(); timo = BR_SCRUB_ONDEMAND; - - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { @@ -1799,7 +1783,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, tmp_count = total_count; for (j = 0; j < count; j++) { - len = snprintf(key, PATH_MAX, "quarantine-%d", j); + len = snprintf(key, sizeof(key), "quarantine-%d", j); ret = dict_get_strn(child_dict, key, len, &entry); if (ret) continue; @@ -1810,7 +1794,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, if ((len < 0) || (len >= PATH_MAX)) { continue; } - snprintf(main_key, PATH_MAX, "quarantine-%d", tmp_count); + snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count); ret = dict_set_dynstr_with_alloc(dict, main_key, tmp); if (!ret) diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 6d0c0b5cead..a2f1c343a1d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -241,8 +241,8 @@ br_object_open(xlator_t *this, br_object_t *object, inode_t *inode, ret = -EINVAL; fd = fd_create(inode, 0); if (!fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, - "failed to create fd for the inode %s", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); goto out; } @@ -296,8 +296,8 @@ br_object_read_block_and_sign(xlator_t *this, fd_t *fd, br_child_t *child, NULL, NULL, NULL); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED, - "readv on %s failed", uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); ret = -1; goto out; } @@ -347,9 +347,9 @@ br_calculate_obj_checksum(unsigned char *md, br_child_t *child, fd_t *fd, ret = br_object_read_block_and_sign(this, fd, child, offset, block, &sha256); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED, - "reading block with offset %" PRIu64 " of object %s failed", - offset, uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED, + "offset=%" PRIu64, offset, "object-gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); break; } @@ -391,28 +391,23 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object, md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char); if (!md) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate memory for saving hash of the " - "object %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_SAVING_HASH_FAILED, + "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } ret = br_object_checksum(md, object, fd, iatt); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED, - "calculating checksum " - "for the object %s failed", - uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED, + "object-gfid=%s", uuid_utoa(linked_inode->gfid), NULL); goto free_signature; } sign = br_prepare_signature(md, SHA256_DIGEST_LENGTH, BR_SIGNATURE_TYPE_SHA256, object); if (!sign) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, - "failed to get the signature for the object %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, + "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto free_signature; } @@ -420,17 +415,16 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object, signature_size(SHA256_DIGEST_LENGTH), _gf_true); if (!xattr) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, - "dict allocation for signing failed for the object %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "dict-allocation object-gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); goto free_isign; } ret = syncop_fsetxattr(object->child->xl, fd, xattr, 0, NULL, NULL); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, - "fsetxattr of signature to the object %s failed", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "fsetxattr object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto unref_dict; } @@ -463,8 +457,8 @@ br_log_object(xlator_t *this, char *op, uuid_t gfid, int32_t op_errno) "[reason: %s]", op, uuid_utoa(gfid), strerror(op_errno)); } else { - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, - "%s() failed on object %s", op, uuid_utoa(gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s", + op, "gfid=%s", uuid_utoa(gfid), NULL); } } @@ -478,8 +472,8 @@ br_log_object_path(xlator_t *this, char *op, const char *path, int32_t op_errno) "[reason: %s]", op, path, strerror(op_errno)); } else { - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, - "%s() failed on object %s", op, path); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s", + op, "path=%s", path, NULL); } } @@ -508,8 +502,8 @@ br_trigger_sign(xlator_t *this, br_child_t *child, inode_t *linked_inode, ret = -1; fd = fd_create(linked_inode, 0); if (!fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, - "Failed to create fd [GFID %s]", uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), NULL); goto cleanup_dict; } @@ -533,9 +527,9 @@ cleanup_dict: dict_unref(dict); out: if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN, - "Could not trigger signingd for %s (reopen hint: %d)", - uuid_utoa(linked_inode->gfid), val); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), "reopen-hint-val=%d", + val, NULL); } } @@ -615,10 +609,8 @@ br_sign_object(br_object_t *object) ret = br_object_read_sign(linked_inode, fd, object, &iatt); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED, - "reading and signing of " - "the object %s failed", - uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), NULL); goto unref_fd; } @@ -672,8 +664,8 @@ br_process_object(void *arg) ret = br_sign_object(object); if (ret && !br_object_sign_softerror(-ret)) - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SIGN_FAILED, - "SIGNING FAILURE [%s]", uuid_utoa(object->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "gfid=%s", uuid_utoa(object->gfid), NULL); GF_FREE(object); } @@ -775,9 +767,8 @@ br_schedule_object_reopen(xlator_t *this, br_object_t *object, timer = br_initialize_timer(this, object, child, ev); if (!timer) - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED, - "Failed to allocate object expiry timer [GFID: %s]", - uuid_utoa(object->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED, + "gfid=%s", uuid_utoa(object->gfid), NULL); return timer ? 0 : -1; } @@ -824,15 +815,15 @@ br_brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev) child = br_get_child_from_brick_path(this, brick); if (!child) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED, - "failed to get the subvolume for the brick %s", brick); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED, + "brick=%s", brick, NULL); goto out; } object = br_initialize_object(this, child, ev); if (!object) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate object memory [GFID: %s]", uuid_utoa(gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, + "object-gfid=%s", uuid_utoa(gfid), NULL); goto out; } @@ -884,8 +875,8 @@ br_check_object_need_sign(xlator_t *this, dict_t *xattr, br_child_t *child) ret = dict_get_ptr(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)&sign); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, - "failed to get object signature info"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, + "object-info", NULL); goto out; } @@ -924,9 +915,9 @@ br_prepare_loc(xlator_t *this, br_child_t *child, loc_t *parent, ret = inode_path(parent->inode, entry->d_name, (char **)&loc->path); if (ret < 0 || !loc->path) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED, - "inode_path on %s (parent: %s) failed", entry->d_name, - uuid_utoa(parent->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED, + "inode_path=%s", entry->d_name, "parent-gfid=%s", + uuid_utoa(parent->inode->gfid), NULL); goto out; } @@ -1018,8 +1009,8 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, */ if (bitd_is_bad_file(this, child, &loc, NULL)) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, - "Entry [%s] is marked corrupted.. skipping.", loc.path); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, "path=%s", + loc.path, NULL); goto unref_inode; } @@ -1036,12 +1027,9 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (op_errno == ENODATA && (iatt.ia_size != 0)) need_signing = _gf_true; if (op_errno == EINVAL) - gf_msg(this->name, GF_LOG_WARNING, 0, - BRB_MSG_PARTIAL_VERSION_PRESENCE, - "Partial " - "version xattr presence detected, ignoring " - "[GFID: %s]", - uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_WARNING, 0, + BRB_MSG_PARTIAL_VERSION_PRESENCE, "gfid=%s", + uuid_utoa(linked_inode->gfid), NULL); } else { need_signing = br_check_object_need_sign(this, xattr, child); @@ -1061,9 +1049,9 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (!need_signing) goto unref_dict; - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, - "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path, - uuid_utoa(linked_inode->gfid), child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, "path=%s", + loc.path, "gfid=%s", uuid_utoa(linked_inode->gfid), "Brick-path=%s", + child->brick_path, NULL); br_trigger_sign(this, child, linked_inode, &loc, need_reopen); ret = 0; @@ -1096,17 +1084,16 @@ br_oneshot_signer(void *arg) THIS = this; - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, - "Crawling brick [%s], scanning for unsigned objects", - child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, "brick-path=%s", + child->brick_path, NULL); loc.inode = child->table->root; (void)syncop_ftw_throttle(child->xl, &loc, GF_CLIENT_PID_BITD, child, bitd_oneshot_crawl, BR_CRAWL_THROTTLE_COUNT, BR_CRAWL_THROTTLE_ZZZ); - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH, - "Completed crawling brick [%s]", child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH, + "brick-path=%s", child->brick_path, NULL); return NULL; } @@ -1150,9 +1137,7 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub) ret = gf_changelog_register_generic(brick, 1, 1, this->ctx->cmd_args.log_file, -1, this); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, - "Register to changelog " - "failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, NULL); goto dealloc; } @@ -1160,8 +1145,8 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub) ret = gf_thread_create(&child->thread, NULL, br_oneshot_signer, child, "brosign"); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED, - "failed to spawn FS crawler thread"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED, + "FS-crawler-thread", NULL); else child->threadrunning = 1; @@ -1189,9 +1174,9 @@ br_launch_scrubber(xlator_t *this, br_child_t *child, struct br_scanfs *fsscan, ret = gf_thread_create(&child->thread, NULL, br_fsscanner, child, "brfsscan"); if (ret != 0) { - gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, - "failed to spawn bitrot scrubber daemon [Brick: %s]", - child->brick_path); + gf_smsg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, + "bitrot-scrubber-daemon Brick-path=%s", child->brick_path, + NULL); goto error_return; } @@ -1279,8 +1264,8 @@ br_child_enaction(xlator_t *this, br_child_t *child, br_stub_init_t *stub) if (!ret) { child->witnessed = 1; _br_set_child_state(child, BR_CHILD_STATE_CONNECTED); - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK, - "Connected to brick %s..", child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK, + "brick-path=%s", child->brick_path, NULL); } } pthread_mutex_unlock(&child->lock); @@ -1327,8 +1312,8 @@ br_brick_connect(xlator_t *this, br_child_t *child) if (ret) { op_errno = -ret; ret = -1; - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED, - "lookup on root failed"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED, + NULL); goto wipeloc; } @@ -1337,15 +1322,14 @@ br_brick_connect(xlator_t *this, br_child_t *child) if (ret) { op_errno = -ret; ret = -1; - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED, - "failed to get stub info"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED, + NULL); goto wipeloc; } ret = dict_get_ptr(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, (void **)&stub); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, - "failed to extract stub information"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, NULL); goto free_dict; } @@ -1415,11 +1399,10 @@ br_cleanup_scrubber(xlator_t *this, br_child_t *child) */ ret = gf_thread_cleanup_xint(child->thread); if (ret) - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, - "Error cleaning up scanner thread"); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, NULL); - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED, - "Cleaned up scrubber for brick [%s]", child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED, + "brick-path=%s", child->brick_path, NULL); return 0; } @@ -1504,9 +1487,8 @@ br_handle_events(void *arg) child = childev->child; ret = childev->call(this, child); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED, - "callback handler for subvolume [%s] failed", - child->xl->name); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED, + "name=%s", child->xl->name, NULL); GF_FREE(childev); } @@ -1524,8 +1506,7 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, - "Memory accounting init failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, NULL); return ret; } @@ -1542,8 +1523,8 @@ _br_qchild_event(xlator_t *this, br_child_t *child, br_child_handler *call) childev = GF_CALLOC(1, sizeof(*childev), gf_br_mt_br_child_event_t); if (!childev) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "Event unhandled for child.. [Brick: %s]", child->xl->name); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_EVENT_UNHANDLED, + "Brick-name=%s", child->xl->name, NULL); return; } @@ -1638,10 +1619,8 @@ notify(xlator_t *this, int32_t event, void *data, ...) switch (event) { case GF_EVENT_CHILD_UP: if (idx < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, - "Got event %d from " - "invalid subvolume", - event); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, + "event=%d", event, NULL); goto out; } @@ -1669,9 +1648,8 @@ notify(xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_CHILD_DOWN: if (idx < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRB_MSG_INVALID_SUBVOL_CHILD, - "Got event %d from invalid subvolume", event); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, + "event=%d", event, NULL); goto out; } @@ -1712,11 +1690,9 @@ notify(xlator_t *this, int32_t event, void *data, ...) "called"); if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "on demand scrub schedule failed. Scrubber is " - "not in pending state. Current state is %d", - scrub_monitor->state); + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, "Current-state=%d", + scrub_monitor->state, NULL); return -2; } @@ -1728,11 +1704,8 @@ notify(xlator_t *this, int32_t event, void *data, ...) pthread_mutex_unlock(&priv->lock); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "Could not schedule ondemand scrubbing. " - "Scrubbing will continue according to " - "old frequency."); + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, NULL); } gf_msg_debug(this->name, 0, "returning %d", ret); break; @@ -1744,22 +1717,26 @@ out: return 0; } -/** - * Initialize signer specific structures, spawn worker threads. - */ - static void br_fini_signer(xlator_t *this, br_private_t *priv) { int i = 0; - for (; i < BR_WORKERS; i++) { + if (priv == NULL) + return; + + for (; i < priv->signer_th_count; i++) { (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); } + GF_FREE(priv->obj_queue->workers); pthread_cond_destroy(&priv->object_cond); } +/** + * Initialize signer specific structures, spawn worker threads. + */ + static int32_t br_init_signer(xlator_t *this, br_private_t *priv) { @@ -1779,13 +1756,17 @@ br_init_signer(xlator_t *this, br_private_t *priv) goto cleanup_cond; INIT_LIST_HEAD(&priv->obj_queue->objects); - for (i = 0; i < BR_WORKERS; i++) { + priv->obj_queue->workers = GF_CALLOC( + priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t); + if (!priv->obj_queue->workers) + goto cleanup_obj_queue; + + for (i = 0; i < priv->signer_th_count; i++) { ret = gf_thread_create(&priv->obj_queue->workers[i], NULL, br_process_object, this, "brpobj"); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED, - "thread creation" - " failed"); + gf_smsg(this->name, GF_LOG_ERROR, -ret, + BRB_MSG_THREAD_CREATION_FAILED, NULL); ret = -1; goto cleanup_threads; } @@ -1797,7 +1778,9 @@ cleanup_threads: for (i--; i >= 0; i--) { (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); } + GF_FREE(priv->obj_queue->workers); +cleanup_obj_queue: GF_FREE(priv->obj_queue); cleanup_cond: @@ -1850,18 +1833,17 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) if (contribution == 0) contribution = 1; spec.rate = BR_HASH_CALC_READ_SIZE * contribution; - spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; + spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE; #endif if (!spec.rate) - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, - "[Rate Limit Info] \"FULL THROTTLE\""); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, + "FULL THROTTLE", NULL); else - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, - "[Rate Limit Info] \"tokens/sec (rate): %lu, " - "maxlimit: %lu\"", - spec.rate, spec.maxlimit); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, + "tokens/sec-rate=%lu", spec.rate, "maxlimit=%lu", spec.maxlimit, + NULL); priv->tbf = tbf_init(&spec, 1); return priv->tbf ? 0 : -1; @@ -1870,11 +1852,16 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) static int32_t br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options) { - if (options) + if (options) { GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32, error_return); - else + GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options, + uint32, error_return); + } else { GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); + GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, + error_return); + } return 0; @@ -1890,6 +1877,8 @@ br_signer_init(xlator_t *this, br_private_t *priv) GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); GF_OPTION_INIT("brick-count", numbricks, int32, error_return); + GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, + error_return); ret = br_rate_limit_signer(this, priv->child_count, numbricks); if (ret) @@ -1976,8 +1965,8 @@ br_init_children(xlator_t *this, br_private_t *priv) child->timer_pool = mem_pool_new(struct gf_tw_timer_list, 4096); if (!child->timer_pool) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate mem-pool for timer"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_MEM_POOL_ALLOC, + NULL); errno = ENOMEM; goto freechild; } @@ -2003,15 +1992,13 @@ init(xlator_t *this) br_private_t *priv = NULL; if (!this->children) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, - "FATAL: no children"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, NULL); goto out; } priv = GF_CALLOC(1, sizeof(*priv), gf_br_mt_br_private_t); if (!priv) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate memory (->priv)"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, NULL); goto out; } @@ -2029,8 +2016,8 @@ init(xlator_t *this) priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx); if (!priv->timer_wheel) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE, - "global timer wheel unavailable"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE, + NULL); goto cleanup; } @@ -2052,15 +2039,14 @@ init(xlator_t *this) ret = gf_thread_create(&priv->thread, NULL, br_handle_events, this, "brhevent"); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED, - "thread creation failed"); + gf_smsg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_THREAD_CREATION_FAILED, + NULL); ret = -1; } if (!ret) { - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, - "bit-rot xlator loaded in \"%s\" mode", - (priv->iamscrubber) ? "SCRUBBER" : "SIGNER"); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, "mode=%s", + (priv->iamscrubber) ? "SCRUBBER" : "SIGNER", NULL); return 0; } @@ -2107,9 +2093,8 @@ br_reconfigure_monitor(xlator_t *this) ret = br_scrub_state_machine(this, _gf_false); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "Could not reschedule scrubber for the volume. Scrubbing " - "will continue according to old frequency."); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, + NULL); } } @@ -2220,6 +2205,15 @@ struct volume_options options[] = { .description = "Pause/Resume scrub. Upon resume, scrubber " "continues from where it left off.", }, + { + .key = {"signer-threads"}, + .type = GF_OPTION_TYPE_INT, + .default_value = BR_WORKERS, + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Number of signing process threads. As a best " + "practice, set this to the number of processor cores", + }, {.key = {NULL}}, }; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index a4d4fd74198..8ac7dcdac3d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -30,12 +30,6 @@ #include <openssl/sha.h> -/** - * TODO: make this configurable. As a best practice, set this to the - * number of processor cores. - */ -#define BR_WORKERS 4 - typedef enum scrub_throttle { BR_SCRUB_THROTTLE_VOID = -1, BR_SCRUB_THROTTLE_LAZY = 0, @@ -108,12 +102,12 @@ struct br_child { typedef struct br_child br_child_t; struct br_obj_n_workers { - struct list_head objects; /* queue of objects expired from the - timer wheel and ready to be picked - up for signing */ - pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects - from the above queue and start - signing each object */ + struct list_head objects; /* queue of objects expired from the + timer wheel and ready to be picked + up for signing */ + pthread_t *workers; /* Threads which pick up the objects + from the above queue and start + signing each object */ }; struct br_scrubber { @@ -209,6 +203,8 @@ struct br_private { uint32_t expiry_time; /* objects "wait" time */ + uint32_t signer_th_count; /* Number of signing process threads */ + tbf_t *tbf; /* token bucket filter */ gf_boolean_t iamscrubber; /* function as a fs scrubber */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index 40bcda110e6..9d93caf069f 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -29,6 +29,7 @@ enum br_mem_types { gf_br_stub_mt_sigstub_t, gf_br_mt_br_child_event_t, gf_br_stub_mt_misc, + gf_br_mt_br_worker_t, gf_br_stub_mt_end, }; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 8d2b7f051da..6c15a166f18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL, BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED, BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED, - BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED); + BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED, + BRS_MSG_VERSION_PREPARE_FAIL); #define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" #define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed" @@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, "daemon. Unwinding the fop" #define BRS_MSG_SIGN_PREPARE_FAIL_STR \ "failed to prepare the signature. Unwinding the fop" +#define BRS_MSG_VERSION_PREPARE_FAIL_STR \ + "failed to prepare the version. Unwinding the fop" #define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding" #define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad" #define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 605a5e4c3e4..447dd47ff41 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict, priv = this->private; br_set_ongoingversion(obuf, oversion, priv->boot); - return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, - sizeof(br_version_t)); + return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, + sizeof(br_version_t)); } static int @@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf, br_set_signature(sbuf, sign, signaturelen, &size); - return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, - size); + return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size); } /** @@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame, op_errno = ENOMEM; dict = dict_new(); if (!dict) - goto done; + goto out; ret = br_stub_alloc_versions(&obuf, NULL, 0); - if (ret) - goto dealloc_dict; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + br_stub_dealloc_versions(obuf); + goto out; + } ret = br_stub_fd_versioning( this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk, writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); - -dealloc_versions: - br_stub_dealloc_versions(obuf); -dealloc_dict: - dict_unref(dict); -done: +out: + if (dict) + dict_unref(dict); if (ret) { if (local) frame->local = NULL; @@ -1025,31 +1028,36 @@ static int br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode, br_isignature_t *sign, int *fakesuccess) { - int32_t ret = 0; + int32_t ret = -1; size_t signaturelen = 0; br_signature_t *sbuf = NULL; if (!br_is_signature_type_valid(sign->signaturetype)) - goto error_return; + goto out; signaturelen = sign->signaturelen; ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen); - if (ret) - goto error_return; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + goto out; + } ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + br_stub_dealloc_versions(sbuf); + goto out; + } + /* At this point sbuf has been added to dict, so the memory will be freed + * when the data from the dict is destroyed + */ ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess); - if (ret) - goto dealloc_versions; - - return 0; - -dealloc_versions: - br_stub_dealloc_versions(sbuf); -error_return: - return -1; +out: + return ret; } static void diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 71fe1f032a0..e561997d858 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -242,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len) } int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer) +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer) { char changelog_path[PATH_MAX + 1] = { 0, @@ -273,7 +272,7 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, goto out; } - len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts, + len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts, priv->rollover_count); if (len >= sizeof(x_value)) { ret = -1; @@ -382,8 +381,7 @@ out: } static int -changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, - unsigned long ts) +changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ret = -1; int notify = 0; @@ -421,16 +419,14 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, priv->changelog_fd = -1; } - time_t time = (time_t)ts; - - /* Get GMT time */ - gmt = gmtime(&time); + /* Get GMT time. */ + gmt = gmtime(&ts); strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt); (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, priv->changelog_dir); - (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%lu", + (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld", priv->changelog_dir, yyyymmdd, ts); (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd); @@ -593,7 +589,7 @@ out: * returns -1 on failure or error */ int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -723,7 +719,7 @@ out: * returns -1 on failure or error */ int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -741,12 +737,12 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) int32_t len = 0; gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE, - "name=%lu", ts, NULL); + "name=%ld", ts, NULL); CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); /* get the htime file name in ht_file_path */ - len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path, + len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path, HTIME_FILE_NAME, ts); if ((len < 0) || (len >= PATH_MAX)) { ret = -1; @@ -792,7 +788,7 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) goto out; } - (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu", + (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld", HTIME_FILE_NAME, ts); if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, strlen(ht_file_bname), 0)) { @@ -963,8 +959,8 @@ out: } int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale) +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale) { int ret = -1; @@ -985,21 +981,12 @@ changelog_entry_length() return sizeof(changelog_log_data_t); } -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last) { - struct timeval tv = { - 0, - }; - cld->cld_type = CHANGELOG_TYPE_ROLLOVER; - - if (gettimeofday(&tv, NULL)) - return -1; - - cld->cld_roll_time = (unsigned long)tv.tv_sec; + cld->cld_roll_time = gf_time(); cld->cld_finale = is_last; - return 0; } int @@ -1274,7 +1261,7 @@ changelog_rollover(void *data) while (1) { (void)pthread_testcancel(); - tv.tv_sec = time(NULL) + priv->rollover_time; + tv.tv_sec = gf_time() + priv->rollover_time; tv.tv_nsec = 0; ret = 0; /* Reset ret to zero */ @@ -1355,12 +1342,7 @@ changelog_rollover(void *data) if (priv->explicit_rollover == _gf_true) sleep(1); - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) { - gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, NULL); - continue; - } + changelog_fill_rollover_data(&cld, _gf_false); _mask_cancellation(); diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 0906b164a78..38fa7590c32 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -31,7 +31,7 @@ */ typedef struct changelog_log_data { /* rollover related */ - unsigned long cld_roll_time; + time_t cld_roll_time; /* reopen changelog? */ gf_boolean_t cld_finale; @@ -97,12 +97,6 @@ struct changelog_encoder { typedef struct changelog_time_slice { /** - * just in case we need nanosecond granularity some day. - * field is unused as of now (maybe we'd need it later). - */ - struct timeval tv_start; - - /** * version of changelog file, incremented each time changes * rollover. */ @@ -190,8 +184,12 @@ typedef struct changelog_ev_selector { /* changelog's private structure */ struct changelog_priv { + /* changelog journalling */ gf_boolean_t active; + /* changelog live notifications */ + gf_boolean_t rpc_active; + /* to generate unique socket file per brick */ char *changelog_brick; @@ -419,11 +417,11 @@ changelog_local_t * changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, int xtra_records, gf_boolean_t update_flag); int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale); +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale); int changelog_open_journal(xlator_t *this, changelog_priv_t *priv); -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last); int changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, @@ -447,12 +445,11 @@ changelog_fsync_thread(void *data); int changelog_forget(xlator_t *this, inode_t *inode); int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer); +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer); int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts); int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts); /* Geo-Rep snapshot dependency changes */ void diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h index 4dd56b8ee97..cb0e16c85d8 100644 --- a/xlators/features/changelog/src/changelog-messages.h +++ b/xlators/features/changelog/src/changelog-messages.h @@ -59,12 +59,12 @@ GLFS_MSGID( CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT, CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR, CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED, - CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, + CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0, CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY, CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE, CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET, - CHANGELOG_MSG_GET_TIME_FAILURE, CHANGELOG_MSG_HTIME_FETCH_FAILED, + CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED, @@ -123,8 +123,6 @@ GLFS_MSGID( #define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)" #define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify" #define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed" -#define CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED_STR \ - "failed to fill rollover data" #define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event" #define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \ "changelog local initialization failed" @@ -144,9 +142,7 @@ GLFS_MSGID( #define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found" #define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \ "Something went wrong in dict_get_str_boolean" -#define CHANGELOG_MSG_GET_TIME_FAILURE_STR "gettimeofday() failure" #define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set" -#define CHANGELOG_MSG_HTIME_FETCH_FAILED_STR "unable to fetch htime" #define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper" #define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \ "translator needs a single subvolume" diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c index afcc3a873c8..125246a17e1 100644 --- a/xlators/features/changelog/src/changelog-rpc-common.c +++ b/xlators/features/changelog/src/changelog-rpc-common.c @@ -262,6 +262,9 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile, struct rpcsvc_program *prog = NULL; rpc_transport_t *trans = NULL; + if (!rpc) + return; + while (*progs) { prog = *progs; (void)rpcsvc_program_unregister(rpc, prog); diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 0a03cfa3673..6a6e5af859e 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = { }, }; +static int +changelog_init_rpc(xlator_t *this, changelog_priv_t *priv); + +static int +changelog_init(xlator_t *this, changelog_priv_t *priv); + /* Entry operations - TYPE III */ /** @@ -1997,6 +2003,11 @@ notify(xlator_t *this, int event, void *data, ...) uint64_t clntcnt = 0; changelog_clnt_t *conn = NULL; gf_boolean_t cleanup_notify = _gf_false; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; INIT_LIST_HEAD(&queue); @@ -2010,23 +2021,40 @@ notify(xlator_t *this, int event, void *data, ...) "cleanup changelog rpc connection of brick %s", priv->victim->name); - this->cleanup_starting = 1; - changelog_destroy_rpc_listner(this, priv); - conn = &priv->connections; - if (conn) - changelog_ev_cleanup_connections(this, conn); - xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); - clntcnt = GF_ATOMIC_GET(priv->clntcnt); - - if (!xprtcnt && !clntcnt) { - LOCK(&priv->lock); - { - cleanup_notify = priv->notify_down; - priv->notify_down = _gf_true; + if (priv->rpc_active) { + this->cleanup_starting = 1; + changelog_destroy_rpc_listner(this, priv); + conn = &priv->connections; + if (conn) + changelog_ev_cleanup_connections(this, conn); + xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); + clntcnt = GF_ATOMIC_GET(priv->clntcnt); + if (!xprtcnt && !clntcnt) { + LOCK(&priv->lock); + { + cleanup_notify = priv->notify_down; + priv->notify_down = _gf_true; + } + UNLOCK(&priv->lock); + if (priv->rpc) { + list_for_each_entry_safe(listener, next, + &priv->rpc->listeners, list) + { + if (listener->trans) { + rpc_transport_unref(listener->trans); + } + } + rpcsvc_destroy(priv->rpc); + priv->rpc = NULL; + } + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, + UNIX_PATH_MAX); + sys_unlink(sockfile); + if (!cleanup_notify) + default_notify(this, GF_EVENT_PARENT_DOWN, data); } - UNLOCK(&priv->lock); - if (!cleanup_notify) - default_notify(this, GF_EVENT_PARENT_DOWN, data); + } else { + default_notify(this, GF_EVENT_PARENT_DOWN, data); } goto out; } @@ -2224,23 +2252,11 @@ static int changelog_init(xlator_t *this, changelog_priv_t *priv) { int i = 0; - int ret = -1; - struct timeval tv = { - 0, - }; + int ret = 0; changelog_log_data_t cld = { 0, }; - ret = gettimeofday(&tv, NULL); - if (ret) { - gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_GET_TIME_FAILURE, - NULL); - goto out; - } - - priv->slice.tv_start = tv; - priv->maps[CHANGELOG_TYPE_DATA] = "D "; priv->maps[CHANGELOG_TYPE_METADATA] = "M "; priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M "; @@ -2259,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv) * in case there was an encoding change. so... things are kept * simple here. */ - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, _gf_false); ret = htime_open(this, priv, cld.cld_roll_time); /* call htime open with cld's rollover_time */ @@ -2405,6 +2419,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv) LOCK_DESTROY(&priv->bflags.lock); } +static void +changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) +{ + /* terminate rpc server */ + if (!this->cleanup_starting) + changelog_destroy_rpc_listner(this, priv); + + (void)changelog_cleanup_rpc_threads(this, priv); + /* cleanup rot buffs */ + rbuf_dtor(priv->rbuf); + + /* cleanup poller thread */ + if (priv->poller) + (void)changelog_thread_cleanup(this, priv->poller); +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -2413,6 +2443,9 @@ reconfigure(xlator_t *this, dict_t *options) changelog_priv_t *priv = NULL; gf_boolean_t active_earlier = _gf_true; gf_boolean_t active_now = _gf_true; + gf_boolean_t rpc_active_earlier = _gf_true; + gf_boolean_t rpc_active_now = _gf_true; + gf_boolean_t iniate_rpc = _gf_false; changelog_time_slice_t *slice = NULL; changelog_log_data_t cld = { 0, @@ -2423,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options) char csnap_dir[PATH_MAX] = { 0, }; - struct timeval tv = { - 0, - }; uint32_t timeout = 0; priv = this->private; @@ -2434,6 +2464,7 @@ reconfigure(xlator_t *this, dict_t *options) ret = -1; active_earlier = priv->active; + rpc_active_earlier = priv->rpc_active; /* first stop the rollover and the fsync thread */ changelog_cleanup_helper_threads(this, priv); @@ -2467,6 +2498,29 @@ reconfigure(xlator_t *this, dict_t *options) goto out; GF_OPTION_RECONF("changelog", active_now, options, bool, out); + GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool, + out); + + /* If journalling is enabled, enable rpc notifications */ + if (active_now && !active_earlier) { + if (!rpc_active_earlier) + iniate_rpc = _gf_true; + } + + if (rpc_active_now && !rpc_active_earlier) { + iniate_rpc = _gf_true; + } + + /* TODO: Disable of changelog-notifications is not supported for now + * as there is no clean way of cleaning up of rpc resources + */ + + if (iniate_rpc) { + ret = changelog_init_rpc(this, priv); + if (ret) + goto out; + priv->rpc_active = _gf_true; + } /** * changelog_handle_change() handles changes that could possibly @@ -2493,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options) out); if (active_now || active_earlier) { - ret = changelog_fill_rollover_data(&cld, !active_now); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, !active_now); slice = &priv->slice; @@ -2514,13 +2566,7 @@ reconfigure(xlator_t *this, dict_t *options) if (!active_earlier) { gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE, NULL); - if (gettimeofday(&tv, NULL)) { - gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_HTIME_FETCH_FAILED, NULL); - ret = -1; - goto out; - } - htime_create(this, priv, tv.tv_sec); + htime_create(this, priv, gf_time()); } ret = changelog_spawn_helper_threads(this, priv); } @@ -2597,6 +2643,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv) goto dealloc_2; GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2); + GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2); GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2); GF_OPTION_INIT("op-mode", tmp, str, dealloc_2); @@ -2635,22 +2682,6 @@ error_return: return -1; } -static void -changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) -{ - /* terminate rpc server */ - if (!this->cleanup_starting) - changelog_destroy_rpc_listner(this, priv); - - (void)changelog_cleanup_rpc_threads(this, priv); - /* cleanup rot buffs */ - rbuf_dtor(priv->rbuf); - - /* cleanup poller thread */ - if (priv->poller) - (void)changelog_thread_cleanup(this, priv->poller); -} - static int changelog_init_rpc(xlator_t *this, changelog_priv_t *priv) { @@ -2747,10 +2778,13 @@ init(xlator_t *this) INIT_LIST_HEAD(&priv->queue); priv->barrier_enabled = _gf_false; - /* RPC ball rolling.. */ - ret = changelog_init_rpc(this, priv); - if (ret) - goto cleanup_barrier; + if (priv->rpc_active || priv->active) { + /* RPC ball rolling.. */ + ret = changelog_init_rpc(this, priv); + if (ret) + goto cleanup_barrier; + priv->rpc_active = _gf_true; + } ret = changelog_init(this, priv); if (ret) @@ -2762,7 +2796,9 @@ init(xlator_t *this) return 0; cleanup_rpc: - changelog_cleanup_rpc(this, priv); + if (priv->rpc_active) { + changelog_cleanup_rpc(this, priv); + } cleanup_barrier: changelog_barrier_pthread_destroy(priv); cleanup_options: @@ -2788,9 +2824,11 @@ fini(xlator_t *this) priv = this->private; if (priv) { - /* terminate RPC server/threads */ - changelog_cleanup_rpc(this, priv); - + if (priv->active || priv->rpc_active) { + /* terminate RPC server/threads */ + changelog_cleanup_rpc(this, priv); + GF_FREE(priv->ev_dispatcher); + } /* call barrier_disable to cancel timer */ if (priv->barrier_enabled) __chlog_barrier_disable(this, &queue); @@ -2859,6 +2897,13 @@ struct volume_options options[] = { .flags = OPT_FLAG_SETTABLE, .level = OPT_STATUS_BASIC, .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"changelog-notification"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable changelog live notification", + .op_version = {3}, + .level = OPT_STATUS_BASIC, + .tags = {"bitrot", "georep"}}, {.key = {"changelog-brick"}, .type = GF_OPTION_TYPE_PATH, .description = "brick path to generate unique socket file name." diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am index 0c3966c968b..e2a277e372b 100644 --- a/xlators/features/cloudsync/src/Makefile.am +++ b/xlators/features/cloudsync/src/Makefile.am @@ -21,9 +21,9 @@ cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources) nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h BUILT_SOURCES = cloudsync-autogen-fops.h -cloudsync_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS) +cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) -cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL) AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ -DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\" diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c index 7680260988b..23c3599825a 100644 --- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c @@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid, int date_len = -1; int res_len = -1; - ctime = time(NULL); + ctime = gf_time(); gtime = gmtime(&ctime); date_len = strftime(httpdate, sizeof(httpdate), diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 4ece7ff6fc8..4abb2c73ce5 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -2104,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) worker_enqueue(this, stub); return 0; normal: - ret = dict_get_str(xattr_req, "link-count", &flag); + ret = dict_get_str_sizen(xattr_req, "link-count", &flag); if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xattr_req); @@ -2592,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...) if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) { stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; /* Wait for draining stub from queue before notify PARENT_DOWN */ diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c index 67fdd53cee2..56dee244281 100644 --- a/xlators/features/leases/src/leases-internal.c +++ b/xlators/features/leases/src/leases-internal.c @@ -897,7 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx) } priv = this->private; - recall_time = time(NULL); + recall_time = gf_time(); list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, lease_id_list) { @@ -1367,7 +1367,7 @@ expired_recall_cleanup(void *data) gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread"); while (1) { - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&priv->mutex); { if (priv->fini) { diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 116aed68690..ab1eac68a53 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, if (plock->blocked) { bcount++; pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, - &plock->user_flock, -1, EAGAIN, NULL); + &plock->user_flock, -1, EINTR, NULL); - STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN, + STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, &plock->user_flock, NULL); } else { diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index c78d5372032..a2c6be93e03 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -460,11 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) INIT_LIST_HEAD(&pl_inode->blocked_calls); INIT_LIST_HEAD(&pl_inode->metalk_list); INIT_LIST_HEAD(&pl_inode->queued_locks); + INIT_LIST_HEAD(&pl_inode->waiting); gf_uuid_copy(pl_inode->gfid, inode->gfid); pl_inode->check_mlock_info = _gf_true; pl_inode->mlock_enforced = _gf_false; + /* -2 means never looked up. -1 means something went wrong and link + * tracking is disabled. */ + pl_inode->links = -2; + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); if (ret) { pthread_mutex_destroy(&pl_inode->mutex); @@ -600,13 +605,11 @@ static void __insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock) { if (lock->blocked) - gettimeofday(&lock->blkd_time, NULL); + lock->blkd_time = gf_time(); else - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add_tail(&lock->list, &pl_inode->ext_list); - - return; } /* Return true if the locks overlap, false otherwise */ @@ -962,7 +965,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) struct list_head granted_list; posix_lock_t *tmp = NULL; posix_lock_t *lock = NULL; - + pl_local_t *local = NULL; INIT_LIST_HEAD(&granted_list); pthread_mutex_lock(&pl_inode->mutex); @@ -977,9 +980,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, 0, 0, NULL); - - STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); - + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); __destroy_lock(lock); } @@ -999,6 +1002,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, struct list_head granted_list; posix_lock_t *tmp = NULL; posix_lock_t *lock = NULL; + pl_local_t *local = NULL; int ret = -1; @@ -1026,9 +1030,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, 0, 0, NULL); - - STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); - + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); __destroy_lock(lock); } @@ -1289,3 +1293,299 @@ pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) } return _gf_true; } + +static int32_t +pl_inode_from_loc(loc_t *loc, inode_t **pinode) +{ + inode_t *inode = NULL; + int32_t error = 0; + + if (loc->inode != NULL) { + inode = inode_ref(loc->inode); + goto done; + } + + if (loc->parent == NULL) { + error = EINVAL; + goto done; + } + + if (!gf_uuid_is_null(loc->gfid)) { + inode = inode_find(loc->parent->table, loc->gfid); + if (inode != NULL) { + goto done; + } + } + + if (loc->name == NULL) { + error = EINVAL; + goto done; + } + + inode = inode_grep(loc->parent->table, loc->parent, loc->name); + if (inode == NULL) { + /* We haven't found any inode. This means that the file doesn't exist + * or that even if it exists, we don't have any knowledge about it, so + * we don't have locks on it either, which is fine for our purposes. */ + goto done; + } + +done: + *pinode = inode; + + return error; +} + +static gf_boolean_t +pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, + struct timespec *now, struct list_head *contend) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *lock; + gf_boolean_t has_owners = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->inodelk_list, list) + { + /* If the lock belongs to the same client, we assume it's related + * to the same operation, so we allow the removal to continue. */ + if (lock->client == client) { + continue; + } + /* If the lock belongs to an internal process, we don't block the + * removal. */ + if (lock->client_pid < 0) { + continue; + } + if (contend == NULL) { + return _gf_true; + } + has_owners = _gf_true; + inodelk_contention_notify_check(xl, lock, now, contend); + } + } + + return has_owners; +} + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend) +{ + struct timespec now; + inode_t *inode; + pl_inode_t *pl_inode; + int32_t error; + + pl_inode = NULL; + + error = pl_inode_from_loc(loc, &inode); + if ((error != 0) || (inode == NULL)) { + goto done; + } + + pl_inode = pl_inode_get(xl, inode, NULL); + if (pl_inode == NULL) { + inode_unref(inode); + error = ENOMEM; + goto done; + } + + /* pl_inode_from_loc() already increments ref count for inode, so + * we only assign here our reference. */ + pl_inode->inode = inode; + + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (pl_inode->removed) { + error = ESTALE; + goto unlock; + } + + if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { + error = -1; + /* We skip the unlock here because the caller must create a stub when + * we return -1 and do a call to pl_inode_remove_complete(), which + * assumes the lock is still acquired and will release it once + * everything else is prepared. */ + goto done; + } + + pl_inode->is_locked = _gf_true; + pl_inode->remove_running++; + +unlock: + pthread_mutex_unlock(&pl_inode->mutex); + +done: + *ppl_inode = pl_inode; + + return error; +} + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend) +{ + pl_inode_lock_t *lock; + int32_t error = -1; + + if (stub != NULL) { + list_add_tail(&stub->list, &pl_inode->waiting); + pl_inode->is_locked = _gf_true; + } else { + error = ENOMEM; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, list); + list_del_init(&lock->list); + __pl_inodelk_unref(lock); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + if (error < 0) { + inodelk_contention_notify(xl, contend); + } + + inode_unref(pl_inode->inode); + + return error; +} + +void +pl_inode_remove_wake(struct list_head *list) +{ + call_stub_t *stub; + + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); + + call_resume(stub); + } +} + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) +{ + struct list_head contend, granted; + struct timespec now; + pl_dom_list_t *dom; + + if (pl_inode == NULL) { + return; + } + + INIT_LIST_HEAD(&contend); + INIT_LIST_HEAD(&granted); + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (error == 0) { + if (pl_inode->links >= 0) { + pl_inode->links--; + } + if (pl_inode->links == 0) { + pl_inode->removed = _gf_true; + } + } + + pl_inode->remove_running--; + + if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { + pl_inode->is_locked = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, + &contend); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(xl, pl_inode, &granted); + + inodelk_contention_notify(xl, &contend); + + inode_unref(pl_inode->inode); +} + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list) +{ + call_stub_t *stub, *tmp; + + if (!pl_inode->is_locked) { + return; + } + + list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) + { + if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, + NULL)) { + list_move_tail(&stub->list, list); + } + } +} + +/* This function determines if an inodelk attempt can be done now or it needs + * to wait. + * + * Possible return values: + * < 0: An error occurred. Currently only -ESTALE can be returned if the + * inode has been deleted previously by unlink/rmdir/rename + * = 0: The lock can be attempted. + * > 0: The lock needs to wait because a conflicting remove operation is + * ongoing. + */ +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *ilock; + + /* If the inode has been deleted, we won't allow any lock. */ + if (pl_inode->removed) { + return -ESTALE; + } + + /* We only synchronize with locks made for regular operations coming from + * the user. Locks done for internal purposes are hard to control and could + * lead to long delays or deadlocks quite easily. */ + if (lock->client_pid < 0) { + return 0; + } + if (!pl_inode->is_locked) { + return 0; + } + if (pl_inode->remove_running > 0) { + return 1; + } + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(ilock, &dom->inodelk_list, list) + { + /* If a lock from the same client is already granted, we allow this + * one to continue. This is necessary to prevent deadlocks when + * multiple locks are taken for the same operation. + * + * On the other side it's unlikely that the same client sends + * completely unrelated locks for the same inode. + */ + if (ilock->client == lock->client) { + return 0; + } + } + } + + return 1; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 0916c299e84..281223bf3b8 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -105,6 +105,15 @@ void __pl_inodelk_unref(pl_inode_lock_t *lock); void +__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend); + +void +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted); + +void grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend); @@ -204,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode); void __pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock); +void +inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock, + struct timespec *now, + struct list_head *contend); + +void +entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock, + struct timespec *now, + struct list_head *contend); + gf_boolean_t pl_does_monkey_want_stuck_lock(); @@ -218,4 +237,26 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd); gf_boolean_t pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client); + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend); + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend); + +void +pl_inode_remove_wake(struct list_head *list); + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error); + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list); + +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock); + #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 0911659b437..fd772c850dd 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -121,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, pl_entry_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; @@ -129,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, * chance? Or just the locks we are attempting to acquire? */ if (names_conflict(candidate_lock->basename, requested_lock->basename)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -204,9 +202,9 @@ out: return revoke_lock; } -static gf_boolean_t -__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, - struct timespec *now) +void +entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -216,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -225,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -238,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -332,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, break; } } - if (__entrylk_needs_contention_notify(this, tmp, now)) { - list_add_tail(&tmp->contend, contend); - } + entrylk_contention_notify_check(this, tmp, now, contend); } } @@ -546,14 +542,10 @@ static int __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, pl_entry_lock_t *lock, int nonblock) { - struct timeval now; - if (nonblock) goto out; - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks); gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", @@ -614,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, } __pl_entrylk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->domain_list, &dom->entrylk_list); ret = 0; @@ -697,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + list_add_tail(&bl->blocked_locks, granted); } } - return; } /* Grants locks if possible which are blocked on a lock */ diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 6022e5a6d94..d4e51d6e0a1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -140,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock, pl_inode_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; /* Question: Should we just prune them all given the * chance? Or just the locks we are attempting to acquire? */ if (inodelk_conflict(candidate_lock, requested_lock)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -229,9 +227,9 @@ out: return revoke_lock; } -static gf_boolean_t -__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, - struct timespec *now) +void +inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -241,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -250,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -263,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -351,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, break; } } - if (__inodelk_needs_contention_notify(this, l, now)) { - list_add_tail(&l->contend, contend); - } + inodelk_contention_notify_check(this, l, now, contend); } } @@ -399,15 +395,11 @@ static int __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, int can_block) { - struct timeval now; - if (can_block == 0) { goto out; } - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks); gf_msg_trace(this->name, 0, @@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, struct list_head *contend) { pl_inode_lock_t *conf = NULL; - int ret = -EINVAL; + int ret; - conf = __inodelk_grantable(this, dom, lock, now, contend); - if (conf) { - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + ret = pl_inode_remove_inodelk(pl_inode, lock); + if (ret < 0) { + return ret; + } + if (ret == 0) { + conf = __inodelk_grantable(this, dom, lock, now, contend); + } + if ((ret > 0) || (conf != NULL)) { + return __lock_blocked_add(this, dom, lock, can_block); } /* To prevent blocked locks starvation, check if there are any blocked @@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, "starvation"); } - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + return __lock_blocked_add(this, dom, lock, can_block); } __pl_inodelk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->list, &dom->inodelk_list); - ret = 0; - -out: - return ret; + return 0; } /* Return true if the two inodelks have exactly same lock boundaries */ @@ -502,33 +495,36 @@ static pl_inode_lock_t * __inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) { pl_inode_lock_t *conf = NULL; + inode_t *inode = NULL; + + inode = lock->pl_inode->inode; conf = find_matching_inodelk(lock, dom); if (!conf) { gf_log(this->name, GF_LOG_ERROR, " Matching lock not found for unlock %llu-%llu, by %s " - "on %p", + "on %p for gfid:%s", (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner), - lock->client); + lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); goto out; } __delete_inode_lock(conf); gf_log(this->name, GF_LOG_DEBUG, - " Matching lock found for unlock %llu-%llu, by %s on %p", + " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s", (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, - lkowner_utoa(&lock->owner), lock->client); + lkowner_utoa(&lock->owner), lock->client, + inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); out: return conf; } -static void +void __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend) { - int bl_ret = 0; pl_inode_lock_t *bl = NULL; pl_inode_lock_t *tmp = NULL; @@ -541,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, { list_del_init(&bl->blocked_locks); - bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); + bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); - if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + if (bl->status != -EAGAIN) { + list_add_tail(&bl->blocked_locks, granted); } } - return; } -/* Grant all inodelks blocked on a lock */ void -grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct timespec *now, - struct list_head *contend) +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) { - struct list_head granted; pl_inode_lock_t *lock; pl_inode_lock_t *tmp; + int32_t op_ret; + int32_t op_errno; - INIT_LIST_HEAD(&granted); - - pthread_mutex_lock(&pl_inode->mutex); - { - __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, - contend); - } - pthread_mutex_unlock(&pl_inode->mutex); - - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { - gf_log(this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lkowner_utoa(&lock->owner), lock->user_flock.l_start, - lock->user_flock.l_len); - + if (lock->status == 0) { + op_ret = 0; + op_errno = 0; + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + } else { + op_ret = -1; + op_errno = -lock->status; + } pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, - 0, 0, lock->volume); + op_ret, op_errno, lock->volume); - STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL); + STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL); lock->frame = NULL; } pthread_mutex_lock(&pl_inode->mutex); { - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { list_del_init(&lock->blocked_locks); __pl_inodelk_unref(lock); @@ -595,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, pthread_mutex_unlock(&pl_inode->mutex); } +/* Grant all inodelks blocked on a lock */ +void +grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) +{ + struct list_head granted; + + INIT_LIST_HEAD(&granted); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(this, pl_inode, &granted); +} + static void pl_inodelk_log_cleanup(pl_inode_lock_t *lock) { @@ -656,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) * and blocked lists, then this means that a parallel * unlock on another inodelk (L2 say) may have 'granted' * L1 and added it to 'granted' list in - * __grant_blocked_node_locks() (although using the + * __grant_blocked_inode_locks() (although using the * 'blocked_locks' member). In that case, the cleanup * codepath must try and grant other overlapping * blocked inodelks from other clients, now that L1 is @@ -741,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, gf_boolean_t need_inode_unref = _gf_false; struct list_head *pcontend = NULL; struct list_head contend; + struct list_head wake; struct timespec now = {}; short fl_type; @@ -792,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, timespec_now(&now); } + INIT_LIST_HEAD(&wake); + if (ctx) pthread_mutex_lock(&ctx->lock); pthread_mutex_lock(&pl_inode->mutex); @@ -814,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); - if (can_block) + if (can_block) { unref = _gf_false; - /* For all but the case where a non-blocking - * lock attempt fails, the extra ref taken at - * the start of this function must be negated. - */ - else - need_inode_unref = _gf_true; + } } - - if (ctx && (!ret || can_block)) + /* For all but the case where a non-blocking lock attempt fails + * with -EAGAIN, the extra ref taken at the start of this function + * must be negated. */ + need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block); + if (ctx && !need_inode_unref) { list_add_tail(&lock->client_list, &ctx->inodelk_lockers); + } } else { /* Irrespective of whether unlock succeeds or not, * the extra inode ref that was done at the start of @@ -843,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, list_del_init(&retlock->client_list); __pl_inodelk_unref(retlock); + pl_inode_remove_unlocked(this, pl_inode, &wake); + ret = 0; } out: @@ -853,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, if (ctx) pthread_mutex_unlock(&ctx->lock); + pl_inode_remove_wake(&wake); + /* The following (extra) unref corresponds to the ref that * was done at the time the lock was granted. */ @@ -1033,10 +1051,14 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, inode); if (ret < 0) { - if ((can_block) && (F_UNLCK != lock_type)) { - goto out; + if (ret == -EAGAIN) { + if (can_block && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + } else { + gf_log(this->name, GF_LOG_TRACE, "returning %d", ret); } - gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); op_errno = -ret; goto unwind; } diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 3305350afb1..c868eb494a2 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -43,9 +43,8 @@ struct __posix_lock { fd_t *fd; call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ /* These two together serve to uniquely identify each process across nodes */ @@ -85,9 +84,9 @@ struct __pl_inode_lock { call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -102,6 +101,9 @@ struct __pl_inode_lock { struct list_head client_list; /* list of all locks from a client */ short fl_type; + + int32_t status; /* Error code when we try to grant a lock in blocked + state */ }; typedef struct __pl_inode_lock pl_inode_lock_t; @@ -136,9 +138,9 @@ struct __entry_lock { const char *basename; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -164,13 +166,14 @@ struct __pl_inode { struct list_head rw_list; /* list of waiting r/w requests */ struct list_head reservelk_list; /* list of reservelks */ struct list_head blocked_reservelks; /* list of blocked reservelks */ - struct list_head - blocked_calls; /* List of blocked lock calls while a reserve is held*/ - struct list_head metalk_list; /* Meta lock list */ - /* This is to store the incoming lock - requests while meta lock is enabled */ - struct list_head queued_locks; - int mandatory; /* if mandatory locking is enabled */ + struct list_head blocked_calls; /* List of blocked lock calls while a + reserve is held*/ + struct list_head metalk_list; /* Meta lock list */ + struct list_head queued_locks; /* This is to store the incoming lock + requests while meta lock is enabled */ + struct list_head waiting; /* List of pending fops waiting to unlink/rmdir + the inode. */ + int mandatory; /* if mandatory locking is enabled */ inode_t *refkeeper; /* hold refs on an inode while locks are held to prevent pruning */ @@ -197,7 +200,13 @@ struct __pl_inode { */ int fop_wind_count; pthread_cond_t check_fop_wind_count; + gf_boolean_t track_fop_wind_count; + + int32_t links; /* Number of hard links the inode has. */ + uint32_t remove_running; /* Number of remove operations running. */ + gf_boolean_t is_locked; /* Regular locks will be blocked. */ + gf_boolean_t removed; /* The inode has been deleted. */ }; typedef struct __pl_inode pl_inode_t; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index af530aafd84..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -148,6 +148,29 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); } \ } while (0) +#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \ + _args...) \ + ({ \ + struct list_head contend; \ + pl_inode_t *__pl_inode; \ + call_stub_t *__stub; \ + int32_t __error; \ + INIT_LIST_HEAD(&contend); \ + __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \ + &__pl_inode, &contend); \ + if (__error < 0) { \ + __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \ + __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \ + &contend); \ + } else if (__error == 0) { \ + PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \ + _loc2); \ + STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \ + FIRST_CHILD(_xl)->fops->_fop, ##_args); \ + } \ + __error; \ + }) + gf_boolean_t pl_has_xdata_requests(dict_t *xdata) { @@ -471,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, char *save_ptr = NULL; tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + strtok_r(tmp_key, ":", &save_ptr); if (!*save_ptr) { if (tmp_key) @@ -2962,11 +2988,85 @@ out: return ret; } +static int32_t +pl_request_link_count(dict_t **pxdata) +{ + dict_t *xdata; + + xdata = *pxdata; + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + return ENOMEM; + } + } else { + dict_ref(xdata); + } + + if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) { + dict_unref(xdata); + return ENOMEM; + } + + *pxdata = xdata; + + return 0; +} + +static int32_t +pl_check_link_count(dict_t *xdata) +{ + int32_t count; + + /* In case we are unable to read the link count from xdata, we take a + * conservative approach and return -2, which will prevent the inode from + * being considered deleted. In fact it will cause link tracking for this + * inode to be disabled completely to avoid races. */ + + if (xdata == NULL) { + return -2; + } + + if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) { + return -2; + } + + return count; +} + int32_t pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { + pl_inode_t *pl_inode; + + if (op_ret >= 0) { + pl_inode = pl_inode_get(this, inode, NULL); + if (pl_inode == NULL) { + PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL, + NULL); + return 0; + } + + pthread_mutex_lock(&pl_inode->mutex); + + /* We only update the link count if we previously didn't know it. + * Doing it always can lead to races since lookup is not executed + * atomically most of the times. */ + if (pl_inode->links == -2) { + pl_inode->links = pl_check_link_count(xdata); + if (buf->ia_type == IA_IFDIR) { + /* Directories have at least 2 links. To avoid special handling + * for directories, we simply decrement the value here to make + * them equivalent to regular files. */ + pl_inode->links--; + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata, postparent); return 0; @@ -2975,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); + int32_t error; + + error = pl_request_link_count(&xdata); + if (error == 0) { + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref(xdata); + } else { + STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL); + } return 0; } @@ -3502,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner, time_t *blkd_time, gf_boolean_t active) { char *type_str = NULL; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; @@ -3556,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode) { pl_dom_list_t *dom = NULL; pl_entry_lock_t *lock = NULL; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; int count = 0; @@ -3579,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->entrylk_list, domain_list) { - gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec, + gf_time_fmt(granted, sizeof(granted), lock->granted_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count); - if (lock->blkd_time.tv_sec == 0) { + if (lock->blkd_time == 0) { snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", @@ -3590,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode) lkowner_utoa(&lock->owner), lock->client, lock->connection_id, granted); } else { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" @@ -3607,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count); @@ -3659,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode) SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, - &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, - _gf_true); + lock->client, lock->connection_id, &lock->granted_time, + &lock->blkd_time, _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3673,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode) count); SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, 0, - &lock->blkd_time.tv_sec, _gf_false); + lock->client, lock->connection_id, 0, &lock->blkd_time, + _gf_false); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3707,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode) gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count, lock->blocked ? "BLOCKED" : "ACTIVE"); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->client_uid, &lock->granted_time.tv_sec, - &lock->blkd_time.tv_sec, - (lock->blocked) ? _gf_false : _gf_true); + lock->client, lock->client_uid, &lock->granted_time, + &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3793,6 +3899,10 @@ unlock: gf_proc_dump_write("posixlk-count", "%d", count); __dump_posixlks(pl_inode); } + + gf_proc_dump_write("links", "%d", pl_inode->links); + gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running); + gf_proc_dump_write("removed", "%u", pl_inode->removed); } pthread_mutex_unlock(&pl_inode->mutex); @@ -4104,6 +4214,10 @@ fini(xlator_t *this) if (!priv) return; this->private = NULL; + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } GF_FREE(priv->brickname); GF_FREE(priv); @@ -4134,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, struct iatt *postoldparent, struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent, postoldparent, prenewparent, postnewparent, xdata); + return 0; } @@ -4143,10 +4260,15 @@ int32_t pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); + int32_t error; + + error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename, + pl_rename_cbk, oldloc, newloc, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL, + NULL, NULL); + } - STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); return 0; } @@ -4270,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -4279,9 +4404,14 @@ int32_t pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + int32_t error; + + error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink, + pl_unlink_cbk, loc, xflag, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -4348,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -4357,9 +4490,14 @@ int pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); + int32_t error; + + error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir, + pl_rmdir_cbk, loc, xflags, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -4389,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_t *pl_inode = (pl_inode_t *)cookie; + + if (op_ret >= 0) { + pthread_mutex_lock(&pl_inode->mutex); + + /* TODO: can happen pl_inode->links == 0 ? */ + if (pl_inode->links >= 0) { + pl_inode->links++; + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf, preparent, postparent, xdata); return 0; @@ -4398,9 +4549,18 @@ int pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { + pl_inode_t *pl_inode; + + pl_inode = pl_inode_get(this, oldloc->inode, NULL); + if (pl_inode == NULL) { + STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); - STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); return 0; } diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c index 4176ab6a8f1..3de2ea1c92c 100644 --- a/xlators/features/marker/src/marker-quota.c +++ b/xlators/features/marker/src/marker-quota.c @@ -1713,21 +1713,17 @@ mq_initiate_quota_task(void *opaque) } out: - if (dirty) { - if (ret < 0) { - /* On failure clear dirty status flag. - * In the next lookup inspect_directory_xattr - * can set the status flag and fix the - * dirty directory. - * Do the same if the dir was dirty before - * txn - */ - ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); - if (ret == 0) - mq_set_ctx_dirty_status(parent_ctx, _gf_false); - } else { - ret = mq_mark_dirty(this, &parent_loc, 0); - } + if ((dirty) && (ret < 0)) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if the dir was dirty before + * txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); } if (locked) diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/metadisp/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am new file mode 100644 index 00000000000..1520ad8c424 --- /dev/null +++ b/xlators/features/metadisp/src/Makefile.am @@ -0,0 +1,38 @@ +noinst_PYTHON = gen-fops.py + +EXTRA_DIST = fops-tmpl.c + +xlator_LTLIBRARIES = metadisp.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +nodist_metadisp_la_SOURCES = fops.c + +BUILT_SOURCES = fops.c + +metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +metadisp_la_SOURCES = metadisp.c \ + metadisp-unlink.c \ + metadisp-stat.c \ + metadisp-lookup.c \ + metadisp-readdir.c \ + metadisp-create.c \ + metadisp-open.c \ + metadisp-fsync.c \ + metadisp-setattr.c \ + backend.c + +metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = metadisp.h metadisp-fops.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py + PYTHONPATH=$(top_srcdir)/libglusterfs/src \ + $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@ + +CLEANFILES = $(nodist_metadisp_la_SOURCES) diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c new file mode 100644 index 00000000000..ee2c25bfaa7 --- /dev/null +++ b/xlators/features/metadisp/src/backend.c @@ -0,0 +1,45 @@ +#define GFID_STR_LEN 37 + +#include "metadisp.h" + +/* + * backend.c + * + * functions responsible for converting user-facing paths to backend-style + * "/$GFID" paths. + */ + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc) +{ + static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char gfid_buf[GFID_STR_LEN + 1] = { + 0, + }; + char *path = NULL; + + GF_VALIDATE_OR_GOTO("metadisp", src_loc, out); + GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out); + + loc_copy(dst_loc, src_loc); + memcpy(dst_loc->pargfid, root, sizeof(root)); + GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke + // whatever loc_copy gave us + + uuid_utoa_r(gfid, gfid_buf); + + path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char), + gf_common_mt_char); // freed via loc_wipe + + path[0] = '/'; + strncpy(path + 1, gfid_buf, GFID_STR_LEN); + path[GFID_STR_LEN] = 0; + dst_loc->path = path; + if (src_loc->name) + dst_loc->name = strrchr(dst_loc->path, '/'); + if (dst_loc->name) + dst_loc->name++; + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c new file mode 100644 index 00000000000..4385b7dd5b7 --- /dev/null +++ b/xlators/features/metadisp/src/fops-tmpl.c @@ -0,0 +1,10 @@ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/xlator.h> +#include "metadisp.h" +#include "metadisp-fops.h" + +#pragma generate diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py new file mode 100644 index 00000000000..8b5e120fdec --- /dev/null +++ b/xlators/features/metadisp/src/gen-fops.py @@ -0,0 +1,160 @@ +#!/usr/bin/python + +import sys +from generator import fop_subs, generate + +FN_METADATA_CHILD_GENERIC = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ metadata"); + STACK_WIND (frame, default_@NAME@_cbk, + METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_GENERIC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ generic"); + STACK_WIND (frame, default_@NAME@_cbk, + DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATAFD_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ datafd"); + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATALOC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ dataloc"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} +""" + +FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@," + +skipped = [ + "readdir", + "readdirp", + "lookup", + "fsync", + "stat", + "open", + "create", + "unlink", + "setattr", + # TODO: implement "inodelk", +] + + +def gen_fops(): + done = skipped + + # + # these are fops that wind to the DATA_CHILD + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "writev", + "readv", + "ftruncate", + "zerofill", + "discard", + "seek", + "fstat", + ]: + done = done + [name] + print(generate(FN_DATAFD_TEMPLATE, name, fop_subs)) + + for name in ["truncate"]: + done = done + [name] + print(generate(FN_DATALOC_TEMPLATE, name, fop_subs)) + + # these are fops that operate solely on dentries, folders, + # or extended attributes. Therefore, they must always + # wind to METADATA_CHILD and should never perform + # any path rewriting + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "mkdir", + "symlink", + "link", + "rename", + "mknod", + "opendir", + # "readdir, # special-cased + # "readdirp, # special-cased + "fsyncdir", + # "setattr", # special-cased + "readlink", + "fentrylk", + "access", + # TODO: these wind to both, + # data for backend-attributes and metadata for the rest + "xattrop", + "setxattr", + "getxattr", + "removexattr", + "fgetxattr", + "fsetxattr", + "fremovexattr", + ]: + + done = done + [name] + print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs)) + + print("struct xlator_fops fops = {") + for name in done: + print(generate(FOPS_LINE_TEMPLATE, name, fop_subs)) + + print("};") + + +for l in open(sys.argv[1], "r").readlines(): + if l.find("#pragma generate") != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_fops() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c new file mode 100644 index 00000000000..f8c9798dd59 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-create.c @@ -0,0 +1,101 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Create, like stat, is a two-step process. We send a create + * to the METADATA_CHILD, then send another create to the DATA_CHILD. + * + * We do the metadata child first to ensure that the ACLs are enforced. + */ + +int32_t +metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + // create the backend data inode + STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +int32_t +metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = cookie; + if (op_ret != 0) { + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; + } + + if (stub == NULL) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + return 0; + } + + call_resume(stub); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("."); + + loc_t backend_loc = { + 0, + }; + call_stub_t *stub = NULL; + uuid_t *gfid_req = NULL; + + RESOLVE_GFID_REQ(xdata, gfid_req, out); + + if (build_backend_loc(*gfid_req, loc, &backend_loc)) { + goto unwind; + } + + frame->local = loc; + + stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags, + mode, umask, fd, xdata); + + STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->create, loc, flags, mode, + umask, fd, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h new file mode 100644 index 00000000000..56dd427cf34 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fops.h @@ -0,0 +1,51 @@ +#ifndef GF_METADISP_FOPS_H_ +#define GF_METADISP_FOPS_H_ + +#include <glusterfs/xlator.h> +#include <glusterfs/dict.h> +#include <glusterfs/glusterfs.h> + +#include <sys/types.h> + +/* fops in here are defined in their own file. Every other fop is just defined + * inline of fops.c */ + +int +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata); + +int +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict); + +int +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata); + +int +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +#endif diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c new file mode 100644 index 00000000000..2e46fa84eac --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fsync.c @@ -0,0 +1,54 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, dict_t *xdata) +{ + STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} + +int32_t +metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata); + STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c new file mode 100644 index 00000000000..27d90c9f746 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-lookup.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Lookup, like stat, is a two-step process for grabbing the metadata details + * as well as the data details. + */ + +int32_t +metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + METADISP_TRACE("backend_lookup_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +} + +int32_t +metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("backend_lookup_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->lookup, &backend_loc, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + + // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + METADISP_TRACE("lookup"); + call_stub_t *stub = NULL; + stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata); + STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c new file mode 100644 index 00000000000..64814afe636 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-open.c @@ -0,0 +1,70 @@ +#include <glusterfs/call-stub.h> +#include "metadisp.h" + +int32_t +metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("got open results %d %d", op_ret, op_errno); + + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (!stub) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, fd_t *fd, dict_t *xdata) +{ + STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this), + DATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd, + xdata); + STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c new file mode 100644 index 00000000000..5f840b1e88f --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-readdir.c @@ -0,0 +1,65 @@ +#include "metadisp.h" + +/** + * With a change to the posix xlator, readdir and readdirp are shockingly + * simple. + * + * The issue with separating the backend data of the files + * with the metadata is that readdirs must now read from multiple sources + * to coalesce the directory entries. + * + * The way we do this is to tell the METADATA_CHILD that when it's + * running readdirp, each file entry should have a stat wound to + * 'stat-source-of-truth'. + * + * see metadisp_stat for how it handles winds _from_posix. + */ + +int32_t +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + /* + * Always use readdirp, even if the original was readdir. Why? Because NFS. + * There are multiple translations between Gluster, UNIX, and NFS stat + * structures in that path. One of them uses the type etc. from the stat + * structure, which is only filled in by readdirp. If we use readdir, the + * entries do actually go all the way back to the client and are visible in + * getdents, but then the readdir throws them away because of the + * uninitialized type. + */ + GF_UNUSED int32_t ret; + if (!xdata) { + xdata = dict_new(); + } + + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + return 0; +} + +int32_t +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + if (!xdata) { + xdata = dict_new(); + } + GF_UNUSED int32_t ret; + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c new file mode 100644 index 00000000000..6991cf644f3 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-setattr.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; +} + +int32_t +metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid, + xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(statpost->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + METADISP_TRACE("setattr"); + call_stub_t *stub = NULL; + stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf, + valid, xdata); + STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c new file mode 100644 index 00000000000..b06d0dbcddd --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-stat.c @@ -0,0 +1,124 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The stat flow in METADISP is complicated because we must + * do ensure a few things: + * 1. stat, on the path within the metadata layer, + * MUST get the backend FD of the data layer. + * --- we wind to the metadata layer, then the data layer. + * + * 2. the metadata layer MUST be able to ask the data + * layer for stat information. + * --- this is 'syncop-internal-from-posix' + * + * 3. when the metadata exists BUT the data is missing, + * we MUST mark the backend file as bad and heal it. + */ + +int32_t +metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno); + if (op_errno == ENOENT) { + STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL); + return 0; + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("winding stat to path %s", loc->path); + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("bad object, sending EUCLEAN"); + STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL); + return 0; + } + + STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + METADISP_TRACE("got stat results %d %d", op_ret, op_errno); + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // only use the stub for the files + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int32_t ret = 0; + loc_t backend_loc = { + 0, + }; + METADISP_FILTER_ROOT(stat, loc, xdata); + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) { + // if we've just been sent a stat from posix, then we know + // that we must send down a stat for a file to the second child. + // + // that means we can skip the stat for the first child and just + // send to the data disk. + METADISP_TRACE("got syncop-internal-from-posix"); + STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->stat, &backend_loc, xdata); + return 0; + } + + // we do not know if the request is for a file, folder, etc. wind + // to first child to find out. + stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata); + METADISP_TRACE("winding stat to first child %s", loc->path); + STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->stat, loc, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c new file mode 100644 index 00000000000..1f6a8eb35ce --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-unlink.c @@ -0,0 +1,160 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The unlink flow in metadisp is complicated because we must + * do ensure that UNLINK causes both the metadata objects + * to get removed and the data objects to get removed. + */ + +int32_t +metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) +{ + METADISP_TRACE("winding backend unlink to path %s", loc->path); + STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE(". %d %d", op_ret, op_errno); + + int ret = 0; + call_stub_t *stub = NULL; + int nlink = 0; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret != 0) { + op_errno = EINVAL; + op_ret = -1; + goto unwind; + } + METADISP_TRACE("frontend hardlink count %d %d", ret, nlink); + if (nlink > 1) { + goto unwind; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; +} + +int32_t +metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + call_stub_t *stub = NULL; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // fail fast on empty gfid so we don't loop forever + if (gf_uuid_is_null(buf->ia_gfid)) { + op_ret = -1; + op_errno = ENODATA; + goto unwind; + } + + // fill gfid since the stub is incomplete + memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t)); + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("winding lookup for unlink to path %s", loc->path); + + // loop back to ourselves after a lookup + stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata); + STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub, + METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; + } + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + // + // ensure we get the link count on the unlink response, so we can + // account for hardlinks before winding to the backend. + // NOTE: + // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation + // is needed to ensure that multiple requests will work in the same + // xlator stack. + // + if (!xdata) { + xdata = dict_new(); + } + dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + + METADISP_TRACE("winding frontend unlink to path %s", loc->path); + stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag, + xdata); + + STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c new file mode 100644 index 00000000000..3c8f150cebc --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.c @@ -0,0 +1,46 @@ +#include <glusterfs/call-stub.h> + +#include "metadisp.h" +#include "metadisp-fops.h" + +int32_t +init(xlator_t *this) +{ + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with children. exiting"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + return 0; +} + +void +fini(xlator_t *this) +{ + return; +} + +/* defined in fops.c */ +struct xlator_fops fops; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .fops = &fops, + .cbks = &cbks, + .options = options, + .op_version = {1}, + .identifier = "metadisp", + .category = GF_EXPERIMENTAL, +}; diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h new file mode 100644 index 00000000000..c8fd7a13c04 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef GF_METADISP_H_ +#define GF_METADISP_H_ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> + +#define METADATA_CHILD(_this) FIRST_CHILD(_this) +#define DATA_CHILD(_this) SECOND_CHILD(_this) + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc); + +#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args) + +#define METADISP_FILTER_ROOT(_op, _args...) \ + if (strcmp(loc->path, "/") == 0) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \ + if (__is_root_gfid(_gfid)) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \ + VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \ + _lbl) + +#endif /* __TEMPLATE_H__ */ diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c index 1a4c2e30dd6..480d64ade27 100644 --- a/xlators/features/quota/src/quota-enforcer-client.c +++ b/xlators/features/quota/src/quota-enforcer-client.c @@ -32,12 +32,6 @@ #include <malloc.h> #endif -#ifdef HAVE_MALLOC_STATS -#ifdef DEBUG -#include <mcheck.h> -#endif -#endif - #include "quota.h" #include "quota-messages.h" @@ -362,16 +356,28 @@ quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata, { xlator_t *this = NULL; int ret = 0; + quota_priv_t *priv = NULL; this = mydata; - + priv = this->private; switch (event) { case RPC_CLNT_CONNECT: { + pthread_mutex_lock(&priv->conn_mutex); + { + priv->conn_status = _gf_true; + } + pthread_mutex_unlock(&priv->conn_mutex); gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT"); break; } case RPC_CLNT_DISCONNECT: { + pthread_mutex_lock(&priv->conn_mutex); + { + priv->conn_status = _gf_false; + pthread_cond_signal(&priv->conn_cond); + } + pthread_mutex_unlock(&priv->conn_mutex); gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT"); break; } diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 01b01b5f01b..18df9ae6d19 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -586,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -626,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * loop of validation and checking * limit when timeout is zero. */ - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -644,27 +640,10 @@ unwind: return 0; } -static uint64_t -quota_time_elapsed(struct timeval *now, struct timeval *then) +static inline gf_boolean_t +quota_timeout(time_t t, uint32_t timeout) { - return (now->tv_sec - then->tv_sec); -} - -int32_t -quota_timeout(struct timeval *tv, int32_t timeout) -{ - struct timeval now = { - 0, - }; - int32_t timed_out = 0; - - gettimeofday(&now, NULL); - - if (quota_time_elapsed(&now, tv) >= timeout) { - timed_out = 1; - } - - return timed_out; + return (gf_time() - t) >= timeout; } /* Return: 1 if new entry added @@ -1128,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if ((object_aggr_count) > ctx->object_hard_lim) { hard_limit_exceeded = 1; @@ -1195,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if (wouldbe_size >= ctx->hard_lim) { hard_limit_exceeded = 1; @@ -4314,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -4348,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; } - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -4873,7 +4848,7 @@ off: void quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, - char **path, struct timeval *cur_time) + char **path, time_t *cur_time) { xlator_t *this = THIS; @@ -4892,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, if (!(*path)) *path = uuid_utoa(inode->gfid); - gettimeofday(cur_time, NULL); + *cur_time = gf_time(); } /* Logs if @@ -4903,9 +4878,7 @@ void quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, int64_t delta) { - struct timeval cur_time = { - 0, - }; + time_t cur_time = 0; char *usage_str = NULL; char *path = NULL; int64_t cur_size = 0; @@ -4931,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } /* Usage is above soft limit */ else if (cur_size > ctx->soft_lim && - quota_timeout(&ctx->prev_log, priv->log_timeout)) { + quota_timeout(ctx->prev_log_time, priv->log_timeout)) { quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time); gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT, @@ -4947,7 +4920,7 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } if (path) @@ -5008,6 +4981,43 @@ quota_forget(xlator_t *this, inode_t *inode) return 0; } +int +notify(xlator_t *this, int event, void *data, ...) +{ + quota_priv_t *priv = NULL; + int ret = 0; + rpc_clnt_t *rpc = NULL; + gf_boolean_t conn_status = _gf_true; + xlator_t *victim = data; + + priv = this->private; + if (!priv || !priv->is_quota_on) + goto out; + + if (event == GF_EVENT_PARENT_DOWN) { + rpc = priv->rpc_clnt; + if (rpc) { + rpc_clnt_disable(rpc); + pthread_mutex_lock(&priv->conn_mutex); + { + conn_status = priv->conn_status; + while (conn_status) { + (void)pthread_cond_wait(&priv->conn_cond, + &priv->conn_mutex); + conn_status = priv->conn_status; + } + } + pthread_mutex_unlock(&priv->conn_mutex); + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); + } + } + +out: + ret = default_notify(this, event, data); + return ret; +} + int32_t init(xlator_t *this) { @@ -5050,6 +5060,10 @@ init(xlator_t *this) goto err; } + pthread_mutex_init(&priv->conn_mutex, NULL); + pthread_cond_init(&priv->conn_cond, NULL); + priv->conn_status = _gf_false; + if (priv->is_quota_on) { rpc = quota_enforcer_init(this, this->options); if (rpc == NULL) { @@ -5143,9 +5157,9 @@ quota_priv_dump(xlator_t *this) if (ret) goto out; else { - gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout); - gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout); - gf_proc_dump_write("alert-time", "%d", priv->log_timeout); + gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout); + gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout); + gf_proc_dump_write("alert-time", "%u", priv->log_timeout); gf_proc_dump_write("quota-on", "%d", priv->is_quota_on); gf_proc_dump_write("statfs", "%d", priv->consider_statfs); gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid); @@ -5163,20 +5177,22 @@ fini(xlator_t *this) { quota_priv_t *priv = NULL; rpc_clnt_t *rpc = NULL; - int i = 0, cnt = 0; priv = this->private; if (!priv) return; rpc = priv->rpc_clnt; priv->rpc_clnt = NULL; - this->private = NULL; if (rpc) { - cnt = GF_ATOMIC_GET(rpc->refcount); - for (i = 0; i < cnt; i++) - rpc_clnt_unref(rpc); + rpc_clnt_connection_cleanup(&rpc->conn); + rpc_clnt_unref(rpc); } + + this->private = NULL; LOCK_DESTROY(&priv->lock); + pthread_mutex_destroy(&priv->conn_mutex); + pthread_cond_destroy(&priv->conn_cond); + GF_FREE(priv); if (this->local_pool) { mem_pool_destroy(this->local_pool); @@ -5308,6 +5324,7 @@ struct volume_options options[] = { xlator_api_t xlator_api = { .init = init, .fini = fini, + .notify = notify, .reconfigure = reconfigure, .mem_acct_init = mem_acct_init, .op_version = {1}, /* Present from the initial version */ diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h index 00012c22f46..0395d78c9ef 100644 --- a/xlators/features/quota/src/quota.h +++ b/xlators/features/quota/src/quota.h @@ -153,8 +153,8 @@ struct quota_inode_ctx { int64_t object_soft_lim; struct iatt buf; struct list_head parents; - struct timeval tv; - struct timeval prev_log; + time_t validate_time; + time_t prev_log_time; gf_boolean_t ancestry_built; gf_lock_t lock; }; @@ -199,6 +199,7 @@ struct quota_local { typedef struct quota_local quota_local_t; struct quota_priv { + /* FIXME: consider time_t for timeouts. */ uint32_t soft_timeout; uint32_t hard_timeout; uint32_t log_timeout; @@ -214,6 +215,9 @@ struct quota_priv { char *volume_uuid; uint64_t validation_count; int32_t quotad_conn_status; + pthread_mutex_t conn_mutex; + pthread_cond_t conn_cond; + gf_boolean_t conn_status; }; typedef struct quota_priv quota_priv_t; diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c index 25fbd4aa748..df45f2a940b 100644 --- a/xlators/features/read-only/src/worm-helper.c +++ b/xlators/features/read-only/src/worm-helper.c @@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr) GF_VALIDATE_OR_GOTO("worm", this, out); GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); - start_time = time(NULL); + start_time = gf_time(); dict = dict_new(); if (!dict) { gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); @@ -94,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, if (ret) goto out; stbuf->ia_mtime = stpre.ia_mtime; - stbuf->ia_atime = time(NULL) + retention_state->ret_period; + stbuf->ia_atime = gf_time() + retention_state->ret_period; if (fop_with_fd) ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, @@ -286,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, { int op_errno = EROFS; int ret = -1; + time_t now = 0; uint64_t com_period = 0; uint64_t start_time = 0; dict_t *dict = NULL; @@ -337,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, goto out; } - if (ret == -1 && (time(NULL) - start_time) >= com_period) { - if ((time(NULL) - stbuf.ia_mtime) >= com_period) { + now = gf_time(); + + if (ret == -1 && (now - start_time) >= com_period) { + if ((now - stbuf.ia_mtime) >= com_period) { ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state, &stbuf); if (ret) { @@ -352,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, op_errno = 0; goto out; } - } else if (ret == -1 && (time(NULL) - start_time) < com_period) { + } else if (ret == -1 && (now - start_time) < com_period) { op_errno = 0; goto out; - } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) { + } else if (reten_state.retain && ((now >= stbuf.ia_atime))) { gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf); } if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable && diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c index 46078c1a96e..1cc5526d5cd 100644 --- a/xlators/features/read-only/src/worm.c +++ b/xlators/features/read-only/src/worm.c @@ -440,8 +440,6 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, { int ret = 0; read_only_priv_t *priv = NULL; - dict_t *dict = NULL; - // In case of an error exit because fd can be NULL and this would // cause an segfault when performing fsetxattr . We explicitly // unwind to avoid future problems @@ -452,24 +450,12 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, priv = this->private; GF_ASSERT(priv); if (priv->worm_file) { - dict = dict_new(); - if (!dict) { - gf_log(this->name, GF_LOG_ERROR, - "Error creating the " - "dict"); - goto out; - } - ret = dict_set_int8(dict, "trusted.worm_file", 1); + ret = fd_ctx_set(fd, this, 1); if (ret) { gf_log(this->name, GF_LOG_ERROR, - "Error in setting " - "the dict"); - goto out; - } - ret = syncop_fsetxattr(this, fd, dict, 0, NULL, NULL); - if (ret) { - gf_log(this->name, GF_LOG_ERROR, "Error setting xattr"); - goto out; + "Failed to set the fd ctx " + "for gfid:%s . Worm feature may not work for the gfid", + uuid_utoa(inode->gfid)); } ret = worm_init_state(this, _gf_true, fd); if (ret) { @@ -480,8 +466,6 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, out: STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, preparent, postparent, xdata); - if (dict) - dict_unref(dict); return ret; } @@ -617,7 +601,62 @@ struct xlator_fops fops = { .lk = ro_lk, }; -struct xlator_cbks cbks; +int32_t +worm_release(xlator_t *this, fd_t *fd) +{ + dict_t *dict = NULL; + int ret = -1; + dict = dict_new(); + uint64_t value = 0; + loc_t loc = { + 0, + }; + read_only_priv_t *priv = NULL; + priv = this->private; + + if (priv->worm_file) { + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); + goto out; + } + + ret = fd_ctx_get(fd, this, &value); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "Failed to get the fd ctx"); + } + if (!value) { + goto out; + } + + ret = dict_set_int8(dict, "trusted.worm_file", 1); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Error in setting " + "the dict"); + goto out; + } + + loc.inode = inode_ref(fd->inode); + gf_uuid_copy(loc.gfid, fd->inode->gfid); + ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error setting xattr"); + goto out; + } + + gf_worm_state_transition(this, _gf_false, &loc, GF_FOP_WRITE); + } + +out: + loc_wipe(&loc); + if (dict) + dict_unref(dict); + return 0; +} + +struct xlator_cbks cbks = { + .release = worm_release, +}; struct volume_options options[] = { {.key = {"worm"}, diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index c59e244429a..e5f93063943 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -513,6 +513,9 @@ shard_local_wipe(shard_local_t *local) loc_wipe(&local->int_entrylk.loc); loc_wipe(&local->newloc); + if (local->name) + GF_FREE(local->name); + if (local->int_entrylk.basename) GF_FREE(local->int_entrylk.basename); if (local->fd) @@ -1001,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) } int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this); + +int shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, shard_post_resolve_fop_handler_t post_res_handler) { @@ -1017,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; shard_local_t *local = NULL; + uint64_t resolve_count = 0; priv = this->private; local = frame->local; local->call_count = 0; shard_idx_iter = local->first_block; res_inode = local->resolver_base_inode; + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + /* If this prealloc FOP is for fresh file creation, then the size of the + * file will be 0. Then there will be no shards associated with this file. + * So we can skip the lookup process for the shards which do not exists + * and directly issue mknod to crete shards. + * + * In case the prealloc fop is to extend the preallocated file to bigger + * size then just lookup and populate inodes of existing shards and + * update the create count + */ + if (local->fop == GF_FOP_FALLOCATE) { + if (!local->prebuf.ia_size) { + local->inode_list[0] = inode_ref(res_inode); + local->create_count = local->last_block; + shard_common_inode_write_post_lookup_shards_handler(frame, this); + return 0; + } + if (local->prebuf.ia_size < local->total_size) + local->create_count = local->last_block - + ((local->prebuf.ia_size - 1) / + local->block_size); + } + + resolve_count = local->last_block - local->create_count; + if (res_inode) gf_uuid_copy(gfid, res_inode->gfid); else gf_uuid_copy(gfid, local->base_gfid); - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; - - while (shard_idx_iter <= local->last_block) { + while (shard_idx_iter <= resolve_count) { i++; if (shard_idx_iter == 0) { local->inode_list[i] = inode_ref(res_inode); @@ -1659,26 +1692,24 @@ err: } int -shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this, + inode_t *inode, int32_t op_ret, + int32_t op_errno, struct iatt *buf, + dict_t *xdata) { int ret = -1; int32_t mask = SHARD_INODE_WRITE_MASK; - shard_local_t *local = NULL; + shard_local_t *local = frame->local; shard_inode_ctx_t ctx = { 0, }; - local = frame->local; - if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" " failed : %s", - loc_gfid_utoa(&(local->loc))); + uuid_utoa(inode->gfid)); local->op_ret = op_ret; local->op_errno = op_errno; goto unwind; @@ -1712,18 +1743,57 @@ unwind: } int -shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, - shard_post_fop_handler_t handler) +shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + shard_local_t *local = frame->local; + + shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret, + op_errno, buf, xdata); + return 0; +} + +int +shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + /* In case of op_ret < 0, inode passed to this function will be NULL + ex: in case of op_errno = ENOENT. So refer prefilled inode data + which is part of local. + Note: Reassigning/overriding the inode passed to this cbk with inode + which is part of *struct shard_local_t* won't cause any issue as + both inodes have same reference/address as of the inode passed */ + inode = ((shard_local_t *)frame->local)->loc.inode; + + shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno, + buf, xdata); + return 0; +} + +/* This function decides whether to make file based lookup or + * fd based lookup (fstat) depending on the 3rd and 4th arg. + * If fd != NULL and loc == NULL then call is for fstat + * If fd == NULL and loc != NULL then call is for file based + * lookup. Please pass args based on the requirement. + */ +int +shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, shard_post_fop_handler_t handler) { int ret = -1; + inode_t *inode = NULL; shard_local_t *local = NULL; dict_t *xattr_req = NULL; gf_boolean_t need_refresh = _gf_false; local = frame->local; local->handler = handler; + inode = fd ? fd->inode : loc->inode; - ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, + ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf, &need_refresh); /* By this time, inode ctx should have been created either in create, * mknod, readdirp or lookup. If not it is a bug! @@ -1732,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" "Serving prebuf off the inode ctx cache", - uuid_utoa(loc->gfid)); + uuid_utoa(inode->gfid)); goto out; } @@ -1743,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, goto out; } - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out); - STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + if (fd) + STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xattr_req); + else + STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); dict_unref(xattr_req); return 0; @@ -2015,8 +2089,8 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) */ if (!inode) { gf_msg_debug(this->name, 0, - "Last shard to be truncated absent in backend: %d of " - "gfid %s. Directly proceeding to update file size", + "Last shard to be truncated absent in backend: %" PRIu64 + " of gfid %s. Directly proceeding to update file size", local->first_block, uuid_utoa(local->loc.inode->gfid)); shard_update_file_size(frame, this, NULL, &local->loc, shard_post_update_size_truncate_handler); @@ -2399,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, int count = 0; int call_count = 0; int32_t shard_idx_iter = 0; - int last_block = 0; + int lookup_count = 0; char path[PATH_MAX] = { 0, }; @@ -2419,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, local = frame->local; count = call_count = local->call_count; shard_idx_iter = local->first_block; - last_block = local->last_block; + lookup_count = local->last_block - local->create_count; local->pls_fop_handler = handler; if (local->lookup_shards_barriered) local->barrier.waitfor = local->call_count; @@ -2429,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, else gf_uuid_copy(gfid, local->base_gfid); - while (shard_idx_iter <= last_block) { + while (shard_idx_iter <= lookup_count) { if (local->inode_list[i]) { i++; shard_idx_iter++; @@ -2574,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; @@ -2723,8 +2798,8 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, local->resolver_base_inode = loc->inode; GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_truncate_handler); return 0; err: @@ -2779,8 +2854,8 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, local->resolver_base_inode = fd->inode; GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_truncate_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); @@ -2924,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!local->xattr_req) goto err; - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_link_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_link_handler); return 0; err: shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); @@ -2939,13 +3014,20 @@ int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) { shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; local = frame->local; + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, - "failed to delete shards of %s", - uuid_utoa(local->resolver_base_inode->gfid)); + "failed to delete shards of %s", uuid_utoa(gfid)); return 0; } local->op_ret = 0; @@ -4247,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) switch (local->fop) { case GF_FOP_UNLINK: case GF_FOP_RENAME: - shard_lookup_base_file(frame, this, &local->int_inodelk.loc, - shard_post_lookup_base_shard_rm_handler); + shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL, + shard_post_lookup_base_shard_rm_handler); break; default: gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, @@ -4503,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->block_size) { local->tmp_loc.inode = inode_new(this->itable); gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); - shard_lookup_base_file(frame, this, &local->tmp_loc, - shard_post_rename_lookup_handler); + shard_refresh_base_file(frame, this, &local->tmp_loc, NULL, + shard_post_rename_lookup_handler); } else { shard_rename_cbk(frame, this); } @@ -5143,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = local->loc.inode; local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), @@ -5239,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_readv_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_readv_handler); return 0; err: shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); @@ -5603,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame, shard_common_lookup_shards( frame, this, local->resolver_base_inode, shard_common_inode_write_post_lookup_shards_handler); + } else if (local->create_count) { + shard_common_inode_write_post_lookup_shards_handler(frame, this); } else { shard_common_inode_write_do(frame, this); } @@ -5633,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, local->last_block = get_highest_block(local->offset, local->total_size, local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); if (!local->inode_list) { @@ -5641,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, } gf_msg_trace(this->name, 0, - "%s: gfid=%s first_block=%" PRIu32 + "%s: gfid=%s first_block=%" PRIu64 " " - "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 + "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), @@ -6038,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_fsync_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_fsync_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); @@ -6231,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } int32_t -shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local, + char *key) { - int op_errno = EINVAL; + int ret = 0; + struct iatt *tmpbuf = NULL; + struct iatt *stbuf = NULL; + data_t *data = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + if (!xdata) + return 0; + + data = dict_get(xdata, key); + if (!data) + return 0; + + tmpbuf = data_to_iatt(data, key); + stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); + if (stbuf == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; } + *stbuf = *tmpbuf; + stbuf->ia_size = local->prebuf.ia_size; + stbuf->ia_blocks = local->prebuf.ia_blocks; + ret = dict_set_iatt(xdata, key, stbuf, false); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + return 0; - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +err: + GF_FREE(stbuf); + return -1; +} + +int32_t +shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + + local = frame->local; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + if (local->fd) + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, local->fd, + local->name, local->xattr_req); + else + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &local->loc, + local->name, local->xattr_req); + return 0; +} + +int32_t +shard_common_remove_xattr(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + /* If shard's special xattrs are attempted to be removed, + * fail the fop with EPERM (except if the client is gsyncd). + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err); } + /* Repeat the same check for bulk-removexattr */ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + + if (name) { + local->name = gf_strdup(name); + if (!local->name) + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_remove_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name, + xdata); + return 0; +} + +int32_t +shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name, + xdata); return 0; } @@ -6353,38 +6601,164 @@ out: } int32_t -shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int op_errno = EINVAL; + int ret = -1; + shard_local_t *local = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + if (local->fd) + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, local->fd, + local->xattr_req, local->flags, local->xattr_rsp); + else + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, + local->xattr_req, local->flags, local->xattr_rsp); + return 0; +} + +int32_t +shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, + loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags, + dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + + /* Sharded or not, if shard's special xattrs are attempted to be set, + * fail the fop with EPERM (except if the client is gsyncd. + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err); } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + local->flags = flags; + /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict + * and the xdata dict + */ + if (dict) + local->xattr_req = dict_ref(dict); + if (xdata) + local->xattr_rsp = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_set_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, + xdata); + return 0; +} + +int32_t +shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, + xdata); return 0; } @@ -6647,8 +7021,8 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_common_inode_write_post_lookup_handler); return 0; out: shard_common_failure_unwind(fop, frame, -1, ENOMEM); diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 04abd62c21c..4fe181b64d5 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame, typedef struct shard_local { int op_ret; int op_errno; - int first_block; - int last_block; - int num_blocks; + uint64_t first_block; + uint64_t last_block; + uint64_t num_blocks; int call_count; int eexist_count; int create_count; @@ -318,6 +318,7 @@ typedef struct shard_local { uint32_t deletion_rate; gf_boolean_t cleanup_required; uuid_t base_gfid; + char *name; } shard_local_t; typedef struct shard_inode_ctx { diff --git a/xlators/features/snapview-client/src/snapview-client-messages.h b/xlators/features/snapview-client/src/snapview-client-messages.h index f6b8f48ef72..c02fb154930 100644 --- a/xlators/features/snapview-client/src/snapview-client-messages.h +++ b/xlators/features/snapview-client/src/snapview-client-messages.h @@ -33,6 +33,39 @@ GLFS_MSGID(SNAPVIEW_CLIENT, SVC_MSG_NO_MEMORY, SVC_MSG_MEM_ACNT_FAILED, SVC_MSG_RENAME_SNAPSHOT_ENTRY, SVC_MSG_LINK_SNAPSHOT_ENTRY, SVC_MSG_COPY_ENTRY_POINT_FAILED, SVC_MSG_ENTRY_POINT_SPECIAL_DIR, SVC_MSG_STR_LEN, SVC_MSG_INVALID_ENTRY_POINT, SVC_MSG_NULL_PRIV, - SVC_MSG_PRIV_DESTROY_FAILED); + SVC_MSG_PRIV_DESTROY_FAILED, SVC_MSG_ALLOC_FD_FAILED, + SVC_MSG_ALLOC_INODE_FAILED, SVC_MSG_NULL_SPECIAL_DIR, + SVC_MSG_MEM_POOL_GET_FAILED); +#define SVC_MSG_ALLOC_FD_FAILED_STR "failed to allocate new fd context" +#define SVC_MSG_SET_FD_CONTEXT_FAILED_STR "failed to set fd context" +#define SVC_MSG_STR_LEN_STR \ + "destination buffer size is less than the length of entry point name" +#define SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL_STR "lookup failed on normal graph" +#define SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL_STR "lookup failed on snapview graph" +#define SVC_MSG_SET_INODE_CONTEXT_FAILED_STR "failed to set inode context" +#define SVC_MSG_NO_MEMORY_STR "failed to allocate memory" +#define SVC_MSG_COPY_ENTRY_POINT_FAILED_STR \ + "failed to copy the entry point string" +#define SVC_MSG_GET_FD_CONTEXT_FAILED_STR "fd context not found" +#define SVC_MSG_GET_INODE_CONTEXT_FAILED_STR "failed to get inode context" +#define SVC_MSG_ALLOC_INODE_FAILED_STR "failed to allocate new inode" +#define SVC_MSG_DICT_SET_FAILED_STR "failed to set dict" +#define SVC_MSG_RENAME_SNAPSHOT_ENTRY_STR \ + "rename happening on a entry residing in snapshot" +#define SVC_MSG_DELETE_INODE_CONTEXT_FAILED_STR "failed to delete inode context" +#define SVC_MSG_NULL_PRIV_STR "priv NULL" +#define SVC_MSG_INVALID_ENTRY_POINT_STR "not a valid entry point" +#define SVC_MSG_MEM_ACNT_FAILED_STR "Memory accouting init failed" +#define SVC_MSG_NO_CHILD_FOR_XLATOR_STR "configured without any child" +#define SVC_MSG_XLATOR_CHILDREN_WRONG_STR \ + "snap-view-client has got wrong subvolumes. It can have only 2" +#define SVC_MSG_ENTRY_POINT_SPECIAL_DIR_STR \ + "entry point directory cannot be part of special directory" +#define SVC_MSG_NULL_SPECIAL_DIR_STR "null special directory" +#define SVC_MSG_MEM_POOL_GET_FAILED_STR \ + "could not get mem pool for frame->local" +#define SVC_MSG_PRIV_DESTROY_FAILED_STR "failed to destroy private" +#define SVC_MSG_LINK_SNAPSHOT_ENTRY_STR \ + "link happening on a entry residin gin snapshot" #endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */ diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c index f22d4eb70a8..486c5179d5b 100644 --- a/xlators/features/snapview-client/src/snapview-client.c +++ b/xlators/features/snapview-client/src/snapview-client.c @@ -198,16 +198,15 @@ __svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd) svc_fd = svc_fd_new(); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY, - "failed to allocate new fd context for gfid %s", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_FD_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); goto out; } ret = __svc_fd_ctx_set(this, fd, svc_fd); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED, - "failed to set fd context for gfid %s", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); ret = -1; } @@ -268,11 +267,9 @@ gf_svc_get_entry_point(xlator_t *this, char *entry_point, size_t dest_size) LOCK(&priv->lock); { if (dest_size <= strlen(priv->path)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN, - "destination buffer " - "size %zu is less than the length %zu of " - "the entry point name %s", - dest_size, strlen(priv->path), priv->path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN, + "dest-size=%zu", dest_size, "priv-path-len=%zu", + strlen(priv->path), "path=%s", priv->path, NULL); } else { snprintf(entry_point, dest_size, "%s", priv->path); ret = 0; @@ -321,19 +318,17 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, */ if (op_ret) { if (subvolume == FIRST_CHILD(this)) { - gf_msg(this->name, - (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG - : GF_LOG_ERROR, - op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, - "lookup failed on normal graph with error %s", - strerror(op_errno)); + gf_smsg(this->name, + (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, "error=%s", + strerror(op_errno), NULL); } else { - gf_msg(this->name, - (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG - : GF_LOG_ERROR, - op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, - "lookup failed on snapview graph with error %s", - strerror(op_errno)); + gf_smsg(this->name, + (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, "error=%s", + strerror(op_errno), NULL); goto out; } @@ -364,10 +359,8 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode type in the inode context " - "(gfid: %s)", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); out: if (do_unwind) { @@ -416,8 +409,7 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (!local) { op_ret = -1; op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate local"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL); goto out; } @@ -457,9 +449,8 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -540,9 +531,8 @@ gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (path_len >= snap_len && inode_type == VIRTUAL_INODE) { path = &loc->path[path_len - snap_len]; if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string "); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -703,8 +693,8 @@ gf_svc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (special_dir) { svc_fd = svc_fd_ctx_get_or_new(this, fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "fd context not found for %s", uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -749,10 +739,9 @@ gf_svc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, local = mem_get0(this->local_pool); if (!local) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate memory for local " - "(path: %s, gfid: %s)", - loc->path, uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, + "path=%s", loc->path, "gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); goto out; } loc_copy(&local->loc, loc); @@ -793,11 +782,9 @@ gf_svc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s " - "(gfid: %s)", - loc->path, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid= %s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -908,9 +895,8 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (!strcmp(attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) { if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1006,11 +992,9 @@ gf_svc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s " - "(gfid: %s)", - loc->name, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -1052,10 +1036,9 @@ gf_svc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -1097,11 +1080,9 @@ gf_svc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s " - "(gfid: %s)", - loc->name, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -1137,8 +1118,8 @@ gf_svc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent, @@ -1168,17 +1149,15 @@ gf_svc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1215,8 +1194,8 @@ gf_svc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, @@ -1246,17 +1225,15 @@ gf_svc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1341,8 +1318,8 @@ gf_svc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, @@ -1374,17 +1351,15 @@ gf_svc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1422,8 +1397,8 @@ gf_svc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent, @@ -1454,17 +1429,15 @@ gf_svc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1506,11 +1479,9 @@ gf_svc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context " - "for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } @@ -1658,10 +1629,8 @@ gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * reconfigure while this is accessing it. */ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string. " - "Proceeding."); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1699,9 +1668,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, svc_fd = svc_fd_ctx_get_or_new(this, fd); if (!svc_fd) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for inode %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); else { if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { op_ret = 0; @@ -1715,9 +1683,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local = mem_get0(this->local_pool); if (!local) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate local (inode: %s)", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, + "inode-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } local->subvolume = subvolume; @@ -1798,17 +1765,16 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, svc_fd = svc_fd_ctx_get(this, local->fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for the inode %s", - uuid_utoa(local->fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL); op_ret = 0; op_errno = ENOENT; goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); op_ret = 0; op_errno = ENOENT; goto out; @@ -1816,8 +1782,8 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, entry = gf_dirent_for_name(entry_point); if (!entry) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, - "failed to allocate memory for the entry %s", entry_point); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "entry-point=%s", entry_point, NULL); op_ret = 0; op_errno = ENOMEM; goto out; @@ -1831,9 +1797,8 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = VIRTUAL_INODE; ret = svc_inode_ctx_set(this, entry->inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set the inode context for the entry %s", - entry->d_name); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + "entry-name=%s", entry->d_name, NULL); list_add_tail(&entry->list, &entries.list); op_ret = 1; @@ -1878,14 +1843,14 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this, inode_unref(loc->inode); loc->inode = inode_new(loc->parent->table); if (!loc->inode) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY, - "failed to allocate new inode"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_INODE_FAILED, + NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); goto out; } @@ -1915,8 +1880,7 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this, ret = dict_set_str(tmp_xdata, "entry-point", "true"); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, - "failed to set dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL); goto out; } @@ -1960,9 +1924,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, fd = local->fd; svc_fd = svc_fd_ctx_get(this, fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for inode %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -1979,8 +1942,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, strcmp(private->special_dir, "") && svc_fd->special_dir && local->subvolume == FIRST_CHILD(this)) { if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, 0, + SVC_MSG_GET_FD_CONTEXT_FAILED, NULL); goto out; } @@ -1988,8 +1951,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, if (!inode) { inode = inode_new(fd->inode->table); if (!inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, - "failed to allocate new inode"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ALLOC_INODE_FAILED, + NULL); goto out; } } @@ -2019,8 +1982,7 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, goto out; ret = dict_set_str(tmp_xdata, "entry-point", "true"); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_SET_FAILED, - "failed to set dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL); goto out; } @@ -2070,9 +2032,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, svc_fd = svc_fd_ctx_get(this, local->fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for gfid %s", - uuid_utoa(local->fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL); } if (local->subvolume == FIRST_CHILD(this)) @@ -2088,8 +2049,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * condition where, priv->path is changed in reconfigure */ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); goto out; } @@ -2110,9 +2071,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ret = svc_inode_ctx_set(this, entry->inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, - SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_SET_INODE_CONTEXT_FAILED, NULL); if (svc_fd) svc_fd->last_offset = entry->d_off; } @@ -2151,8 +2111,7 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local = mem_get0(this->local_pool); if (!local) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate local"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL); goto out; } @@ -2167,9 +2126,8 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, */ svc_fd = svc_fd_ctx_get_or_new(this, fd); if (!svc_fd) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for the inode %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); else { if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { op_ret = 0; @@ -2224,22 +2182,17 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the context for the inode " - "%s", - uuid_utoa(oldloc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(oldloc->inode->gfid), NULL); goto out; } if (src_inode_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_RENAME_SNAPSHOT_ENTRY, - "rename happening on a entry %s " - "residing in snapshot", - oldloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "name=%s", oldloc->name, NULL); goto out; } @@ -2248,11 +2201,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && dst_inode_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_RENAME_SNAPSHOT_ENTRY, - "rename of %s happening to a entry " - "%s residing in snapshot", - oldloc->name, newloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s", + oldloc->name, "newloc-name=%s", newloc->name, NULL); goto out; } } @@ -2262,11 +2213,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && dst_parent_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_RENAME_SNAPSHOT_ENTRY, - "rename of %s happening to a entry %s " - "residing in snapshot", - oldloc->name, newloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s", + oldloc->name, "newloc-name=%s", newloc->name, NULL); goto out; } } @@ -2308,9 +2257,8 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && src_inode_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, - "link happening on a entry %s residing in snapshot", - oldloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, + "oldloc-name=%s", oldloc->name, NULL); goto out; } @@ -2318,10 +2266,9 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && dst_parent_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, - "link of %s happening to a entry %s " - "residing in snapshot", - oldloc->name, newloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, + "oldloc-name=%s", oldloc->name, "newloc-name=%s", newloc->name, + NULL); goto out; } @@ -2356,11 +2303,9 @@ gf_svc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s " - "(gfid: %s)", - loc->path, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -2401,10 +2346,9 @@ gf_svc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -2488,8 +2432,9 @@ gf_svc_forget(xlator_t *this, inode_t *inode) ret = inode_ctx_del(inode, this, &value); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DELETE_INODE_CONTEXT_FAILED, - "failed to delete inode context for %s", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_DELETE_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(inode->gfid), NULL); goto out; } @@ -2503,7 +2448,7 @@ gf_svc_priv_destroy(xlator_t *this, svc_private_t *priv) int ret = -1; if (!priv) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, "priv NULL"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, NULL); goto out; } @@ -2558,10 +2503,8 @@ reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("snapshot-directory", path, options, str, out); if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, - "%s is not a " - "valid entry point", - path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, + "path=%s", path, NULL); goto out; } @@ -2621,9 +2564,7 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_svc_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, - "Memory accounting" - " init failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, NULL); } return ret; @@ -2640,8 +2581,7 @@ init(xlator_t *this) char *special_dir = NULL; if (!this->children) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, - "configured without any child"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, NULL); goto out; } @@ -2652,11 +2592,8 @@ init(xlator_t *this) } if (children != 2) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG, - "snap-view-client " - "has got %d subvolumes. It can have only 2 " - "subvolumes.", - children); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG, + "subvol-num=%d", children, NULL); goto out; } @@ -2676,41 +2613,36 @@ init(xlator_t *this) GF_OPTION_INIT("snapshot-directory", path, str, out); if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, - "%s is not a valid entry point", path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, + "path=%s", path, NULL); goto out; } private ->path = gf_strdup(path); if (!private->path) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_NO_MEMORY, - "failed to allocate memory " - "for the entry point path %s", - path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "entry-point-path=%s", path, NULL); goto out; } GF_OPTION_INIT("snapdir-entry-path", special_dir, str, out); if (!special_dir || strstr(special_dir, path)) { if (special_dir) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ENTRY_POINT_SPECIAL_DIR, - "entry point directory %s cannot be part of " - "the special directory %s", - path, special_dir); + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_ENTRY_POINT_SPECIAL_DIR, "path=%s", path, + "special-dir=%s", special_dir); else - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ENTRY_POINT_SPECIAL_DIR, - "null special directory"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NULL_SPECIAL_DIR, + NULL); goto out; } private ->special_dir = gf_strdup(special_dir); if (!private->special_dir) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_NO_MEMORY, - "failed to allocate memory " - "for the special directory %s", - special_dir); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "special-directory=%s", special_dir, NULL); goto out; } @@ -2719,8 +2651,7 @@ init(xlator_t *this) this->local_pool = mem_pool_new(svc_local_t, 128); if (!this->local_pool) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, - "could not get mem pool for frame->local"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_MEM_POOL_GET_FAILED, NULL); goto out; } @@ -2752,8 +2683,8 @@ fini(xlator_t *this) * set this->priv to NULL. */ if (gf_svc_priv_destroy(this, priv)) - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED, - "failed to destroy private"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED, + NULL); this->private = NULL; diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index f44b11c6872..7d09cba3e9c 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -212,11 +212,11 @@ void append_time_stamp(char *name, size_t name_size) { int i; - char timestr[64] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; - gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS); + gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS); /* removing white spaces in timestamp */ for (i = 0; i < strlen(timestr); i++) { diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c index 978825f6b56..c641bd6f432 100644 --- a/xlators/features/upcall/src/upcall-internal.c +++ b/xlators/features/upcall/src/upcall-internal.c @@ -316,7 +316,7 @@ upcall_reaper_thread(void *data) priv = this->private; GF_ASSERT(priv); - time_now = time(NULL); + time_now = gf_time(); while (!priv->fini) { list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list, inode_ctx_list) @@ -344,7 +344,7 @@ upcall_reaper_thread(void *data) /* don't do a very busy loop */ timeout = get_cache_invalidation_timeout(this); sleep(timeout / 2); - time_now = time(NULL); + time_now = gf_time(); } return NULL; @@ -533,7 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, goto out; } - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, @@ -670,13 +670,13 @@ upcall_cache_forget(xlator_t *this, inode_t *inode, return; } - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, &up_inode_ctx->client_list, client_list) { - /* Set the access time to time(NULL) + /* Set the access time to gf_time() * to send notify */ up_client_entry->access_time = time_now; diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c index 3b8dadd3191..2acc63e6a05 100644 --- a/xlators/features/utime/src/utime.c +++ b/xlators/features/utime/src/utime.c @@ -147,6 +147,7 @@ gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie, } frame->local = NULL; call_resume(stub); + STACK_DESTROY(frame->root); return 0; } @@ -162,6 +163,7 @@ gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, loc_t loc = { 0, }; + call_frame_t *new_frame = NULL; if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) { dict = dict_new(); @@ -181,19 +183,32 @@ gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, "dict set of key for set-ctime-mdata failed"); goto err; } - frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk, op_ret, - op_errno, inode, stbuf, xdata, - postparent); - if (!frame->local) { + new_frame = copy_frame(frame); + if (!new_frame) { + op_errno = ENOMEM; + goto stub_err; + } + + new_frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk, + op_ret, op_errno, inode, stbuf, + xdata, postparent); + if (!new_frame->local) { gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY, "lookup_cbk stub allocation failed"); + op_errno = ENOMEM; + STACK_DESTROY(new_frame->root); goto stub_err; } loc.inode = inode_ref(inode); gf_uuid_copy(loc.gfid, stbuf->ia_gfid); - STACK_WIND(frame, gf_utime_set_mdata_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, &loc, dict, 0, NULL); + + new_frame->root->uid = 0; + new_frame->root->gid = 0; + new_frame->root->pid = GF_CLIENT_PID_SET_UTIME; + STACK_WIND(new_frame, gf_utime_set_mdata_setxattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, &loc, + dict, 0, NULL); dict_unref(dict); inode_unref(loc.inode); |