diff options
Diffstat (limited to 'xlators/features')
119 files changed, 6689 insertions, 2444 deletions
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 194634b003d..c57897f11ea 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC CLOUDSYNC_DIR = cloudsync endif +if BUILD_METADISP + METADISP_DIR = metadisp +endif + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ compress changelog gfid-access snapview-client snapview-server trash \ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ - utime + utime $(METADISP_DIR) CLEANFILES = diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c index 4f8fa211d0b..852bbacb99d 100644 --- a/xlators/features/barrier/src/barrier.c +++ b/xlators/features/barrier/src/barrier.c @@ -461,7 +461,7 @@ out: int notify(xlator_t *this, int event, void *data, ...) { - barrier_priv_t *priv = NULL; + barrier_priv_t *priv = this->private; dict_t *dict = NULL; int ret = -1; int barrier_enabled = _gf_false; @@ -469,7 +469,6 @@ notify(xlator_t *this, int event, void *data, ...) 0, }; - priv = this->private; GF_ASSERT(priv); INIT_LIST_HEAD(&queue); @@ -491,19 +490,23 @@ notify(xlator_t *this, int event, void *data, ...) if (barrier_enabled) { ret = __barrier_enable(this, priv); } else { + UNLOCK(&priv->lock); gf_log(this->name, GF_LOG_ERROR, "Already disabled."); + goto post_unlock; } } else { if (!barrier_enabled) { __barrier_disable(this, &queue); ret = 0; } else { + UNLOCK(&priv->lock); gf_log(this->name, GF_LOG_ERROR, "Already enabled"); + goto post_unlock; } } } UNLOCK(&priv->lock); - + post_unlock: if (!list_empty(&queue)) barrier_dequeue_all(this, &queue); @@ -726,10 +729,10 @@ barrier_dump_priv(xlator_t *this) gf_proc_dump_build_key(key, "xlator.features.barrier", "priv"); gf_proc_dump_add_section("%s", key); + gf_proc_dump_build_key(key, "barrier", "enabled"); LOCK(&priv->lock); { - gf_proc_dump_build_key(key, "barrier", "enabled"); gf_proc_dump_write(key, "%d", priv->barrier_enabled); gf_proc_dump_build_key(key, "barrier", "timeout"); gf_proc_dump_write(key, "%ld", priv->timeout.tv_sec); diff --git a/xlators/features/barrier/src/barrier.h b/xlators/features/barrier/src/barrier.h index e5977084f21..1337f311f7d 100644 --- a/xlators/features/barrier/src/barrier.h +++ b/xlators/features/barrier/src/barrier.h @@ -65,11 +65,12 @@ typedef struct { gf_timer_t *timer; - gf_boolean_t barrier_enabled; gf_lock_t lock; struct list_head queue; struct timespec timeout; uint32_t queue_size; + gf_boolean_t barrier_enabled; + char _pad[3]; /* manual padding */ } barrier_priv_t; int diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h index 3d40089dc95..5bc5103a27c 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h @@ -47,6 +47,55 @@ GLFS_MSGID(BITROT_BITD, BRB_MSG_FD_CREATE_FAILED, BRB_MSG_READV_FAILED, BRB_MSG_SCRUB_THREAD_CLEANUP, BRB_MSG_SCRUBBER_CLEANED, BRB_MSG_GENERIC_SSM_INFO, BRB_MSG_ZERO_TIMEOUT_BUG, BRB_MSG_BAD_OBJ_READDIR_FAIL, BRB_MSG_SSM_FAILED, - BRB_MSG_SCRUB_WAIT_FAILED); + BRB_MSG_SCRUB_WAIT_FAILED, BRB_MSG_TRIGGER_SIGN_FAILED, + BRB_MSG_EVENT_UNHANDLED, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, + BRB_MSG_THREAD_CREATION_FAILED, BRB_MSG_MEM_POOL_ALLOC, + BRB_MSG_SAVING_HASH_FAILED); +#define BRB_MSG_FD_CREATE_FAILED_STR "failed to create fd for the inode" +#define BRB_MSG_READV_FAILED_STR "readv failed" +#define BRB_MSG_BLOCK_READ_FAILED_STR "reading block failed" +#define BRB_MSG_NO_MEMORY_STR "failed to allocate memory" +#define BRB_MSG_CALC_CHECKSUM_FAILED_STR "calculating checksum failed" +#define BRB_MSG_GET_SIGN_FAILED_STR "failed to get the signature" +#define BRB_MSG_SET_SIGN_FAILED_STR "signing failed" +#define BRB_MSG_OP_FAILED_STR "failed on object" +#define BRB_MSG_TRIGGER_SIGN_FAILED_STR "Could not trigger signing" +#define BRB_MSG_READ_AND_SIGN_FAILED_STR "reading and signing of object failed" +#define BRB_MSG_SET_TIMER_FAILED_STR "Failed to allocate object expiry timer" +#define BRB_MSG_GET_SUBVOL_FAILED_STR \ + "failed to get the subvolume for the brick" +#define BRB_MSG_PATH_FAILED_STR "path failed" +#define BRB_MSG_SKIP_OBJECT_STR "Entry is marked corrupted. skipping" +#define BRB_MSG_PARTIAL_VERSION_PRESENCE_STR \ + "PArtial version xattr presence detected, ignoring" +#define BRB_MSG_TRIGGER_SIGN_STR "Triggering signing" +#define BRB_MSG_CRAWLING_START_STR \ + "Crawling brick, scanning for unsigned objects" +#define BRB_MSG_CRAWLING_FINISH_STR "Completed crawling brick" +#define BRB_MSG_REGISTER_FAILED_STR "Register to changelog failed" +#define BRB_MSG_SPAWN_FAILED_STR "failed to spawn" +#define BRB_MSG_CONNECTED_TO_BRICK_STR "Connected to brick" +#define BRB_MSG_LOOKUP_FAILED_STR "lookup on root failed" +#define BRB_MSG_GET_INFO_FAILED_STR "failed to get stub info" +#define BRB_MSG_SCRUB_THREAD_CLEANUP_STR "Error cleaning up scanner thread" +#define BRB_MSG_SCRUBBER_CLEANED_STR "clened up scrubber for brick" +#define BRB_MSG_SUBVOL_CONNECT_FAILED_STR \ + "callback handler for subvolume failed" +#define BRB_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" +#define BRB_MSG_EVENT_UNHANDLED_STR "Event unhandled for child" +#define BRB_MSG_INVALID_SUBVOL_STR "Got event from invalid subvolume" +#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED_STR \ + "on demand scrub schedule failed. Scrubber is not in pending state." +#define BRB_MSG_COULD_NOT_SCHEDULE_SCRUB_STR \ + "Could not schedule ondemand scrubbing. Scrubbing will continue " \ + "according to old frequency." +#define BRB_MSG_THREAD_CREATION_FAILED_STR "thread creation failed" +#define BRB_MSG_RATE_LIMIT_INFO_STR "Rate Limit Info" +#define BRB_MSG_MEM_POOL_ALLOC_STR "failed to allocate mem-pool for timer" +#define BRB_MSG_NO_CHILD_STR "FATAL: no children" +#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE_STR "global timer wheel unavailable" +#define BRB_MSG_BITROT_LOADED_STR "bit-rot xlator loaded" +#define BRB_MSG_SAVING_HASH_FAILED_STR \ + "failed to allocate memory for saving hash of the object" #endif /* !_BITROT_BITD_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c index 34e20f9df11..5cef2ffa5e5 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c @@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat) } void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv) +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time) { if (!scrub_stat) return; pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_start_time = time; } pthread_mutex_unlock(&scrub_stat->lock); } void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv) + time_t time) { int lst_size = 0; @@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_end_time = time; - scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec - - scrub_stat->scrub_start_tv.tv_sec; + scrub_stat->scrub_duration = scrub_stat->scrub_end_time - + scrub_stat->scrub_start_time; snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr); } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h index 24128b90a66..f022aa831eb 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h @@ -15,20 +15,22 @@ #include <sys/time.h> #include <pthread.h> +#include <glusterfs/common-utils.h> + struct br_scrub_stats { - uint64_t scrubbed_files; /* Total number of scrubbed file */ + uint64_t scrubbed_files; /* Total number of scrubbed files. */ - uint64_t unsigned_files; /* Total number of unsigned file */ + uint64_t unsigned_files; /* Total number of unsigned files. */ - uint64_t scrub_duration; /* Duration of last scrub */ + uint64_t scrub_duration; /* Duration of last scrub. */ - char last_scrub_time[1024]; /*last scrub completion time */ + char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */ - struct timeval scrub_start_tv; /* Scrubbing starting time*/ + time_t scrub_start_time; /* Scrubbing starting time. */ - struct timeval scrub_end_tv; /* Scrubbing finishing time */ + time_t scrub_end_time; /* Scrubbing finishing time. */ - int8_t scrub_running; /* Scrub running or not */ + int8_t scrub_running; /* Whether scrub running or not. */ pthread_mutex_t lock; }; @@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat); void br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat); void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv); +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time); void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv); + time_t time); #endif /* __BIT_ROT_SCRUB_STATUS_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 35318dcfa4e..289dd53f610 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -601,25 +601,23 @@ br_fsscan_deactivate(xlator_t *this) static void br_scrubber_log_time(xlator_t *this, const char *sfx) { - char timestr[1024] = { - 0, - }; - struct timeval tv = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; br_private_t *priv = NULL; + time_t now = 0; + now = gf_time(); priv = this->private; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { - br_update_scrub_start_time(&priv->scrub_stat, &tv); + br_update_scrub_start_time(&priv->scrub_stat, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, "Scrubbing %s at %s", sfx, timestr); } else { - br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv); + br_update_scrub_finish_time(&priv->scrub_stat, timestr, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, "Scrubbing %s at %s", sfx, timestr); } @@ -628,15 +626,13 @@ br_scrubber_log_time(xlator_t *this, const char *sfx) static void br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx) { - char timestr[1024] = { - 0, - }; - struct timeval tv = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + now = gf_time(); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s", @@ -720,8 +716,10 @@ br_scrubber_exit_control(xlator_t *this) if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) { (void)br_fsscan_activate(this); } else { + UNLOCK(&scrub_monitor->lock); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Volume waiting to get rescheduled.."); + return; } } UNLOCK(&scrub_monitor->lock); @@ -917,10 +915,7 @@ br_fsscan_schedule(xlator_t *this) { uint32_t timo = 0; br_private_t *priv = NULL; - struct timeval tv = { - 0, - }; - char timestr[1024] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; struct br_scrubber *fsscrub = NULL; @@ -931,8 +926,7 @@ br_fsscan_schedule(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&tv, NULL); - scrub_monitor->boot = tv.tv_sec; + scrub_monitor->boot = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { @@ -973,12 +967,10 @@ int32_t br_fsscan_activate(xlator_t *this) { uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -987,7 +979,7 @@ br_fsscan_activate(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1001,7 +993,7 @@ br_fsscan_activate(xlator_t *this) } pthread_mutex_unlock(&scrub_monitor->donelock); - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); (void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo); _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); @@ -1018,12 +1010,10 @@ br_fsscan_reschedule(xlator_t *this) { int32_t ret = 0; uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -1035,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this) if (!fsscrub->frequency_reconf) return 0; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1043,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this) return -1; } - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { @@ -1071,23 +1061,19 @@ br_fsscan_ondemand(xlator_t *this) { int32_t ret = 0; uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_monitor *scrub_monitor = NULL; priv = this->private; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); - + now = gf_time(); timo = BR_SCRUB_ONDEMAND; - - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { @@ -1655,7 +1641,7 @@ br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd, int32_t ret = -1; off_t offset = 0; int32_t count = 0; - char key[PATH_MAX] = { + char key[32] = { 0, }; dict_t *out_dict = NULL; @@ -1693,7 +1679,7 @@ br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd, } ret = count; - ret = dict_set_int32(dict, "count", count); + ret = dict_set_int32_sizen(dict, "count", count); out: return ret; @@ -1775,10 +1761,10 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, { int32_t ret = -1; int32_t count = 0; - char key[PATH_MAX] = { + char key[32] = { 0, }; - char main_key[PATH_MAX] = { + char main_key[32] = { 0, }; int32_t j = 0; @@ -1790,15 +1776,15 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, char *path = NULL; int32_t len = 0; - ret = dict_get_int32(child_dict, "count", &count); + ret = dict_get_int32_sizen(child_dict, "count", &count); if (ret) goto out; tmp_count = total_count; for (j = 0; j < count; j++) { - snprintf(key, PATH_MAX, "quarantine-%d", j); - ret = dict_get_str(child_dict, key, &entry); + len = snprintf(key, sizeof(key), "quarantine-%d", j); + ret = dict_get_strn(child_dict, key, len, &entry); if (ret) continue; @@ -1808,7 +1794,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, if ((len < 0) || (len >= PATH_MAX)) { continue; } - snprintf(main_key, PATH_MAX, "quarantine-%d", tmp_count); + snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count); ret = dict_set_dynstr_with_alloc(dict, main_key, tmp); if (!ret) diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 7b1c5dcdab6..a2f1c343a1d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -9,10 +9,7 @@ */ #include <ctype.h> -#include <sys/uio.h> -#include <glusterfs/glusterfs.h> -#include <glusterfs/xlator.h> #include <glusterfs/logging.h> #include <glusterfs/compat-errno.h> @@ -244,8 +241,8 @@ br_object_open(xlator_t *this, br_object_t *object, inode_t *inode, ret = -EINVAL; fd = fd_create(inode, 0); if (!fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, - "failed to create fd for the inode %s", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); goto out; } @@ -299,8 +296,8 @@ br_object_read_block_and_sign(xlator_t *this, fd_t *fd, br_child_t *child, NULL, NULL, NULL); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED, - "readv on %s failed", uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); ret = -1; goto out; } @@ -350,9 +347,9 @@ br_calculate_obj_checksum(unsigned char *md, br_child_t *child, fd_t *fd, ret = br_object_read_block_and_sign(this, fd, child, offset, block, &sha256); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED, - "reading block with offset %" PRIu64 " of object %s failed", - offset, uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED, + "offset=%" PRIu64, offset, "object-gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); break; } @@ -394,28 +391,23 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object, md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char); if (!md) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate memory for saving hash of the " - "object %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_SAVING_HASH_FAILED, + "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } ret = br_object_checksum(md, object, fd, iatt); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED, - "calculating checksum " - "for the object %s failed", - uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED, + "object-gfid=%s", uuid_utoa(linked_inode->gfid), NULL); goto free_signature; } sign = br_prepare_signature(md, SHA256_DIGEST_LENGTH, BR_SIGNATURE_TYPE_SHA256, object); if (!sign) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, - "failed to get the signature for the object %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, + "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto free_signature; } @@ -423,17 +415,16 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object, signature_size(SHA256_DIGEST_LENGTH), _gf_true); if (!xattr) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, - "dict allocation for signing failed for the object %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "dict-allocation object-gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); goto free_isign; } ret = syncop_fsetxattr(object->child->xl, fd, xattr, 0, NULL, NULL); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, - "fsetxattr of signature to the object %s failed", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "fsetxattr object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto unref_dict; } @@ -466,8 +457,8 @@ br_log_object(xlator_t *this, char *op, uuid_t gfid, int32_t op_errno) "[reason: %s]", op, uuid_utoa(gfid), strerror(op_errno)); } else { - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, - "%s() failed on object %s", op, uuid_utoa(gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s", + op, "gfid=%s", uuid_utoa(gfid), NULL); } } @@ -481,8 +472,8 @@ br_log_object_path(xlator_t *this, char *op, const char *path, int32_t op_errno) "[reason: %s]", op, path, strerror(op_errno)); } else { - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, - "%s() failed on object %s", op, path); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s", + op, "path=%s", path, NULL); } } @@ -511,8 +502,8 @@ br_trigger_sign(xlator_t *this, br_child_t *child, inode_t *linked_inode, ret = -1; fd = fd_create(linked_inode, 0); if (!fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, - "Failed to create fd [GFID %s]", uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), NULL); goto cleanup_dict; } @@ -536,9 +527,9 @@ cleanup_dict: dict_unref(dict); out: if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN, - "Could not trigger signingd for %s (reopen hint: %d)", - uuid_utoa(linked_inode->gfid), val); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), "reopen-hint-val=%d", + val, NULL); } } @@ -618,10 +609,8 @@ br_sign_object(br_object_t *object) ret = br_object_read_sign(linked_inode, fd, object, &iatt); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED, - "reading and signing of " - "the object %s failed", - uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), NULL); goto unref_fd; } @@ -675,8 +664,8 @@ br_process_object(void *arg) ret = br_sign_object(object); if (ret && !br_object_sign_softerror(-ret)) - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SIGN_FAILED, - "SIGNING FAILURE [%s]", uuid_utoa(object->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "gfid=%s", uuid_utoa(object->gfid), NULL); GF_FREE(object); } @@ -778,9 +767,8 @@ br_schedule_object_reopen(xlator_t *this, br_object_t *object, timer = br_initialize_timer(this, object, child, ev); if (!timer) - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED, - "Failed to allocate object expiry timer [GFID: %s]", - uuid_utoa(object->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED, + "gfid=%s", uuid_utoa(object->gfid), NULL); return timer ? 0 : -1; } @@ -827,15 +815,15 @@ br_brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev) child = br_get_child_from_brick_path(this, brick); if (!child) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED, - "failed to get the subvolume for the brick %s", brick); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED, + "brick=%s", brick, NULL); goto out; } object = br_initialize_object(this, child, ev); if (!object) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate object memory [GFID: %s]", uuid_utoa(gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, + "object-gfid=%s", uuid_utoa(gfid), NULL); goto out; } @@ -887,8 +875,8 @@ br_check_object_need_sign(xlator_t *this, dict_t *xattr, br_child_t *child) ret = dict_get_ptr(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)&sign); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, - "failed to get object signature info"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, + "object-info", NULL); goto out; } @@ -927,9 +915,9 @@ br_prepare_loc(xlator_t *this, br_child_t *child, loc_t *parent, ret = inode_path(parent->inode, entry->d_name, (char **)&loc->path); if (ret < 0 || !loc->path) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED, - "inode_path on %s (parent: %s) failed", entry->d_name, - uuid_utoa(parent->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED, + "inode_path=%s", entry->d_name, "parent-gfid=%s", + uuid_utoa(parent->inode->gfid), NULL); goto out; } @@ -973,6 +961,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, int32_t ret = -1; inode_t *linked_inode = NULL; gf_boolean_t need_signing = _gf_false; + gf_boolean_t need_reopen = _gf_true; GF_VALIDATE_OR_GOTO("bit-rot", subvol, out); GF_VALIDATE_OR_GOTO("bit-rot", data, out); @@ -1020,8 +1009,8 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, */ if (bitd_is_bad_file(this, child, &loc, NULL)) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, - "Entry [%s] is marked corrupted.. skipping.", loc.path); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, "path=%s", + loc.path, NULL); goto unref_inode; } @@ -1038,23 +1027,32 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (op_errno == ENODATA && (iatt.ia_size != 0)) need_signing = _gf_true; if (op_errno == EINVAL) - gf_msg(this->name, GF_LOG_WARNING, 0, - BRB_MSG_PARTIAL_VERSION_PRESENCE, - "Partial " - "version xattr presence detected, ignoring " - "[GFID: %s]", - uuid_utoa(linked_inode->gfid)); + gf_smsg(this->name, GF_LOG_WARNING, 0, + BRB_MSG_PARTIAL_VERSION_PRESENCE, "gfid=%s", + uuid_utoa(linked_inode->gfid), NULL); } else { need_signing = br_check_object_need_sign(this, xattr, child); + + /* + * If we are here means, bitrot daemon has started. Is it just + * a simple restart of the daemon or is it started because the + * feature is enabled is something hard to determine. Hence, + * if need_signing is false (because bit-rot version and signature + * are present), then still go ahead and sign it. + */ + if (!need_signing) { + need_signing = _gf_true; + need_reopen = _gf_true; + } } if (!need_signing) goto unref_dict; - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, - "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path, - uuid_utoa(linked_inode->gfid), child->brick_path); - br_trigger_sign(this, child, linked_inode, &loc, _gf_true); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, "path=%s", + loc.path, "gfid=%s", uuid_utoa(linked_inode->gfid), "Brick-path=%s", + child->brick_path, NULL); + br_trigger_sign(this, child, linked_inode, &loc, need_reopen); ret = 0; @@ -1086,17 +1084,16 @@ br_oneshot_signer(void *arg) THIS = this; - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, - "Crawling brick [%s], scanning for unsigned objects", - child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, "brick-path=%s", + child->brick_path, NULL); loc.inode = child->table->root; (void)syncop_ftw_throttle(child->xl, &loc, GF_CLIENT_PID_BITD, child, bitd_oneshot_crawl, BR_CRAWL_THROTTLE_COUNT, BR_CRAWL_THROTTLE_ZZZ); - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH, - "Completed crawling brick [%s]", child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH, + "brick-path=%s", child->brick_path, NULL); return NULL; } @@ -1140,9 +1137,7 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub) ret = gf_changelog_register_generic(brick, 1, 1, this->ctx->cmd_args.log_file, -1, this); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, - "Register to changelog " - "failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, NULL); goto dealloc; } @@ -1150,8 +1145,8 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub) ret = gf_thread_create(&child->thread, NULL, br_oneshot_signer, child, "brosign"); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED, - "failed to spawn FS crawler thread"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED, + "FS-crawler-thread", NULL); else child->threadrunning = 1; @@ -1179,9 +1174,9 @@ br_launch_scrubber(xlator_t *this, br_child_t *child, struct br_scanfs *fsscan, ret = gf_thread_create(&child->thread, NULL, br_fsscanner, child, "brfsscan"); if (ret != 0) { - gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, - "failed to spawn bitrot scrubber daemon [Brick: %s]", - child->brick_path); + gf_smsg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, + "bitrot-scrubber-daemon Brick-path=%s", child->brick_path, + NULL); goto error_return; } @@ -1269,8 +1264,8 @@ br_child_enaction(xlator_t *this, br_child_t *child, br_stub_init_t *stub) if (!ret) { child->witnessed = 1; _br_set_child_state(child, BR_CHILD_STATE_CONNECTED); - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK, - "Connected to brick %s..", child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK, + "brick-path=%s", child->brick_path, NULL); } } pthread_mutex_unlock(&child->lock); @@ -1317,8 +1312,8 @@ br_brick_connect(xlator_t *this, br_child_t *child) if (ret) { op_errno = -ret; ret = -1; - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED, - "lookup on root failed"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED, + NULL); goto wipeloc; } @@ -1327,15 +1322,14 @@ br_brick_connect(xlator_t *this, br_child_t *child) if (ret) { op_errno = -ret; ret = -1; - gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED, - "failed to get stub info"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED, + NULL); goto wipeloc; } ret = dict_get_ptr(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, (void **)&stub); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, - "failed to extract stub information"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, NULL); goto free_dict; } @@ -1405,11 +1399,10 @@ br_cleanup_scrubber(xlator_t *this, br_child_t *child) */ ret = gf_thread_cleanup_xint(child->thread); if (ret) - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, - "Error cleaning up scanner thread"); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, NULL); - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED, - "Cleaned up scrubber for brick [%s]", child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED, + "brick-path=%s", child->brick_path, NULL); return 0; } @@ -1494,9 +1487,8 @@ br_handle_events(void *arg) child = childev->child; ret = childev->call(this, child); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED, - "callback handler for subvolume [%s] failed", - child->xl->name); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED, + "name=%s", child->xl->name, NULL); GF_FREE(childev); } @@ -1514,8 +1506,7 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, - "Memory accounting init failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, NULL); return ret; } @@ -1532,8 +1523,8 @@ _br_qchild_event(xlator_t *this, br_child_t *child, br_child_handler *call) childev = GF_CALLOC(1, sizeof(*childev), gf_br_mt_br_child_event_t); if (!childev) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "Event unhandled for child.. [Brick: %s]", child->xl->name); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_EVENT_UNHANDLED, + "Brick-name=%s", child->xl->name, NULL); return; } @@ -1628,10 +1619,8 @@ notify(xlator_t *this, int32_t event, void *data, ...) switch (event) { case GF_EVENT_CHILD_UP: if (idx < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, - "Got event %d from " - "invalid subvolume", - event); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, + "event=%d", event, NULL); goto out; } @@ -1659,9 +1648,8 @@ notify(xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_CHILD_DOWN: if (idx < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRB_MSG_INVALID_SUBVOL_CHILD, - "Got event %d from invalid subvolume", event); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, + "event=%d", event, NULL); goto out; } @@ -1702,11 +1690,9 @@ notify(xlator_t *this, int32_t event, void *data, ...) "called"); if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "on demand scrub schedule failed. Scrubber is " - "not in pending state. Current state is %d", - scrub_monitor->state); + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, "Current-state=%d", + scrub_monitor->state, NULL); return -2; } @@ -1718,11 +1704,8 @@ notify(xlator_t *this, int32_t event, void *data, ...) pthread_mutex_unlock(&priv->lock); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "Could not schedule ondemand scrubbing. " - "Scrubbing will continue according to " - "old frequency."); + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, NULL); } gf_msg_debug(this->name, 0, "returning %d", ret); break; @@ -1734,22 +1717,26 @@ out: return 0; } -/** - * Initialize signer specific structures, spawn worker threads. - */ - static void br_fini_signer(xlator_t *this, br_private_t *priv) { int i = 0; - for (; i < BR_WORKERS; i++) { + if (priv == NULL) + return; + + for (; i < priv->signer_th_count; i++) { (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); } + GF_FREE(priv->obj_queue->workers); pthread_cond_destroy(&priv->object_cond); } +/** + * Initialize signer specific structures, spawn worker threads. + */ + static int32_t br_init_signer(xlator_t *this, br_private_t *priv) { @@ -1769,13 +1756,17 @@ br_init_signer(xlator_t *this, br_private_t *priv) goto cleanup_cond; INIT_LIST_HEAD(&priv->obj_queue->objects); - for (i = 0; i < BR_WORKERS; i++) { + priv->obj_queue->workers = GF_CALLOC( + priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t); + if (!priv->obj_queue->workers) + goto cleanup_obj_queue; + + for (i = 0; i < priv->signer_th_count; i++) { ret = gf_thread_create(&priv->obj_queue->workers[i], NULL, br_process_object, this, "brpobj"); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED, - "thread creation" - " failed"); + gf_smsg(this->name, GF_LOG_ERROR, -ret, + BRB_MSG_THREAD_CREATION_FAILED, NULL); ret = -1; goto cleanup_threads; } @@ -1787,7 +1778,9 @@ cleanup_threads: for (i--; i >= 0; i--) { (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); } + GF_FREE(priv->obj_queue->workers); +cleanup_obj_queue: GF_FREE(priv->obj_queue); cleanup_cond: @@ -1840,18 +1833,17 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) if (contribution == 0) contribution = 1; spec.rate = BR_HASH_CALC_READ_SIZE * contribution; - spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; + spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE; #endif if (!spec.rate) - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, - "[Rate Limit Info] \"FULL THROTTLE\""); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, + "FULL THROTTLE", NULL); else - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, - "[Rate Limit Info] \"tokens/sec (rate): %lu, " - "maxlimit: %lu\"", - spec.rate, spec.maxlimit); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, + "tokens/sec-rate=%lu", spec.rate, "maxlimit=%lu", spec.maxlimit, + NULL); priv->tbf = tbf_init(&spec, 1); return priv->tbf ? 0 : -1; @@ -1860,11 +1852,16 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) static int32_t br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options) { - if (options) + if (options) { GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32, error_return); - else + GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options, + uint32, error_return); + } else { GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); + GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, + error_return); + } return 0; @@ -1880,6 +1877,8 @@ br_signer_init(xlator_t *this, br_private_t *priv) GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); GF_OPTION_INIT("brick-count", numbricks, int32, error_return); + GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, + error_return); ret = br_rate_limit_signer(this, priv->child_count, numbricks); if (ret) @@ -1966,8 +1965,8 @@ br_init_children(xlator_t *this, br_private_t *priv) child->timer_pool = mem_pool_new(struct gf_tw_timer_list, 4096); if (!child->timer_pool) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate mem-pool for timer"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_MEM_POOL_ALLOC, + NULL); errno = ENOMEM; goto freechild; } @@ -1993,15 +1992,13 @@ init(xlator_t *this) br_private_t *priv = NULL; if (!this->children) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, - "FATAL: no children"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, NULL); goto out; } priv = GF_CALLOC(1, sizeof(*priv), gf_br_mt_br_private_t); if (!priv) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate memory (->priv)"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, NULL); goto out; } @@ -2019,8 +2016,8 @@ init(xlator_t *this) priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx); if (!priv->timer_wheel) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE, - "global timer wheel unavailable"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE, + NULL); goto cleanup; } @@ -2042,15 +2039,14 @@ init(xlator_t *this) ret = gf_thread_create(&priv->thread, NULL, br_handle_events, this, "brhevent"); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED, - "thread creation failed"); + gf_smsg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_THREAD_CREATION_FAILED, + NULL); ret = -1; } if (!ret) { - gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, - "bit-rot xlator loaded in \"%s\" mode", - (priv->iamscrubber) ? "SCRUBBER" : "SIGNER"); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, "mode=%s", + (priv->iamscrubber) ? "SCRUBBER" : "SIGNER", NULL); return 0; } @@ -2097,9 +2093,8 @@ br_reconfigure_monitor(xlator_t *this) ret = br_scrub_state_machine(this, _gf_false); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "Could not reschedule scrubber for the volume. Scrubbing " - "will continue according to old frequency."); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, + NULL); } } @@ -2210,6 +2205,15 @@ struct volume_options options[] = { .description = "Pause/Resume scrub. Upon resume, scrubber " "continues from where it left off.", }, + { + .key = {"signer-threads"}, + .type = GF_OPTION_TYPE_INT, + .default_value = BR_WORKERS, + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Number of signing process threads. As a best " + "practice, set this to the number of processor cores", + }, {.key = {NULL}}, }; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index a4d4fd74198..8ac7dcdac3d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -30,12 +30,6 @@ #include <openssl/sha.h> -/** - * TODO: make this configurable. As a best practice, set this to the - * number of processor cores. - */ -#define BR_WORKERS 4 - typedef enum scrub_throttle { BR_SCRUB_THROTTLE_VOID = -1, BR_SCRUB_THROTTLE_LAZY = 0, @@ -108,12 +102,12 @@ struct br_child { typedef struct br_child br_child_t; struct br_obj_n_workers { - struct list_head objects; /* queue of objects expired from the - timer wheel and ready to be picked - up for signing */ - pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects - from the above queue and start - signing each object */ + struct list_head objects; /* queue of objects expired from the + timer wheel and ready to be picked + up for signing */ + pthread_t *workers; /* Threads which pick up the objects + from the above queue and start + signing each object */ }; struct br_scrubber { @@ -209,6 +203,8 @@ struct br_private { uint32_t expiry_time; /* objects "wait" time */ + uint32_t signer_th_count; /* Number of signing process threads */ + tbf_t *tbf; /* token bucket filter */ gf_boolean_t iamscrubber; /* function as a fs scrubber */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c index cb567297b60..8ac13a09941 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c @@ -133,8 +133,8 @@ br_stub_add(xlator_t *this, uuid_t gfid) * show up less number of objects. That's fine as we'll have * the log files that will have the missing information. */ - gf_msg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL, - "failed to record gfid [%s]", uuid_utoa(gfid)); + gf_smsg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL, "gfid=%s", + uuid_utoa(gfid), NULL); } return 0; @@ -157,10 +157,8 @@ br_stub_del(xlator_t *this, uuid_t gfid) uuid_utoa(gfid)); ret = sys_unlink(gfid_path); if (ret && (errno != ENOENT)) { - gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL, - "%s: failed to delete bad object link from quarantine " - "directory", - gfid_path); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL, + "path=%s", gfid_path, NULL); ret = -errno; goto out; } @@ -200,13 +198,13 @@ br_stub_check_stub_directory(xlator_t *this, char *fullpath) } if (ret) - gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, - "failed to create stub directory [%s]", fullpath); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, + "create-path=%s", fullpath, NULL); return ret; error_return: - gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, - "Failed to verify stub directory [%s]", fullpath); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, + "verify-path=%s", fullpath, NULL); return -1; } @@ -231,8 +229,8 @@ br_stub_check_stub_file(xlator_t *this, char *path) goto error_return; fd = sys_creat(path, 0); if (fd < 0) - gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, - "Failed to create stub file [%s]", path); + gf_smsg(this->name, GF_LOG_ERROR, errno, + BRS_MSG_BAD_OBJECT_DIR_FAIL, "create-path=%s", path, NULL); } if (fd >= 0) { @@ -243,8 +241,8 @@ br_stub_check_stub_file(xlator_t *this, char *path) return ret; error_return: - gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, - "Failed to verify stub file [%s]", path); + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, + "verify-path=%s", path, NULL); return -1; } @@ -463,12 +461,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off, seekdir(dir, off); #ifndef GF_LINUX_HOST_OS if ((u_long)telldir(dir) != off && off != fctx->bad_object.dir_eof) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, - BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", - off, dir); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "off=(0x%llx)", off, + "dir=%p", dir, NULL); errno = EINVAL; count = -1; goto out; @@ -480,9 +475,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off, in_case = (u_long)telldir(dir); if (in_case == -1) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, - BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, - "telldir failed on dir=%p: %s", dir, strerror(errno)); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, "dir=%p", dir, "err=%s", + strerror(errno), NULL); goto out; } @@ -490,9 +485,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off, entry = sys_readdir(dir, scratch); if (!entry || errno != 0) { if (errno == EBADF) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, - BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, - "readdir failed on dir=%p: %s", dir, strerror(errno)); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, + BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, "dir=%p", dir, + "err=%s", strerror(errno), NULL); goto out; } break; @@ -514,12 +509,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off, #ifndef GF_LINUX_HOST_OS if ((u_long)telldir(dir) != in_case && in_case != fctx->bad_object.dir_eof) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, - BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", - in_case, dir); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "in_case=(0x%llx)", + in_case, "dir=%p", dir, NULL); errno = EINVAL; count = -1; goto out; @@ -531,9 +523,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off, this_entry = gf_dirent_for_name(entry->d_name); if (!this_entry) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY, - "could not create gf_dirent for entry %s: (%s)", - entry->d_name, strerror(errno)); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_CREATE_GF_DIRENT_FAILED, "entry-name=%s", + entry->d_name, "err=%s", strerror(errno), NULL); goto out; } /* @@ -580,8 +572,8 @@ br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, fctx = br_stub_fd_ctx_get(this, fd); if (!fctx) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED, - "pfd is NULL, fd=%p", fd); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED, + "fd=%p", fd, NULL); op_errno = -ret; goto done; } @@ -589,8 +581,8 @@ br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, dir = fctx->bad_object.dir; if (!dir) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL, - "dir is NULL for fd=%p", fd); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL, + "fd=%p", fd, NULL); op_errno = EINVAL; goto done; } @@ -680,10 +672,7 @@ br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries, * be shown. */ if (!tmp_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY, - "failed to allocate new dict for saving the paths " - "of the corrupted objects. Scrub status will only " - "display the gfid"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_FAILED, NULL); goto out; } } @@ -707,9 +696,8 @@ br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries, uuid_utoa(gfid), hpath); br_stub_entry_xattr_fill(this, hpath, entry, tmp_dict); } else - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, - "failed to get the path for the inode %s", - uuid_utoa_r(gfid, str_gfid)); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, + "gfid=%s", uuid_utoa_r(gfid, str_gfid), NULL); inode = NULL; hpath = NULL; @@ -744,10 +732,8 @@ br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode, ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid, inode, path, _gf_true); if (ret < 0) - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, - "failed to get the path xattr from disk for the " - " gfid %s. Trying to get path from the memory", - uuid_utoa_r(gfid, gfid_str)); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, + "gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL); /* * Try with soft resolution of path if hard resolve fails. Because @@ -768,9 +754,8 @@ br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode, ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid, inode, path, _gf_false); if (ret < 0) - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, - "failed to get the path from the memory for gfid %s", - uuid_utoa_r(gfid, gfid_str)); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, + "from-memory gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL); } out: @@ -804,10 +789,8 @@ br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry, ret = dict_set_dynstr(dict, entry->d_name, hpath); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED, - "failed to set the actual path %s as the value in the " - "dict for the corrupted object %s", - hpath, entry->d_name); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED, + "path=%s", hpath, "object-name=%s", entry->d_name, NULL); out: return; } diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index 40bcda110e6..9d93caf069f 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -29,6 +29,7 @@ enum br_mem_types { gf_br_stub_mt_sigstub_t, gf_br_mt_br_child_event_t, gf_br_stub_mt_misc, + gf_br_mt_br_worker_t, gf_br_stub_mt_end, }; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 7f07f2929ad..6c15a166f18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -39,6 +39,79 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, - BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL); + BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, + BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL, + BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED, + BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED, + BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED, + BRS_MSG_VERSION_PREPARE_FAIL); +#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" +#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed" +#define BRS_MSG_USING_DEFAULT_THREAD_SIZE_STR "Using default thread stack size" +#define BRS_MSG_NO_CHILD_STR "FATAL: no children" +#define BRS_MSG_SPAWN_SIGN_THRD_FAILED_STR \ + "failed to create the new thread for signer" +#define BRS_MSG_BAD_CONTAINER_FAIL_STR \ + "failed to launch the thread for storing bad gfids" +#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED_STR \ + "Could not cancel sign serializer thread" +#define BRS_MSG_KILL_SIGN_THREAD_STR "killed the signer thread" +#define BRS_MSG_GET_INODE_CONTEXT_FAILED_STR \ + "failed to init the inode context for the inode" +#define BRS_MSG_ADD_FD_TO_INODE_STR "failed to add fd to the inode" +#define BRS_MSG_NO_MEMORY_STR "local allocation failed" +#define BRS_MSG_BAD_OBJECT_ACCESS_STR "bad object accessed. Returning" +#define BRS_MSG_SIGN_VERSION_ERROR_STR "Signing version exceeds current version" +#define BRS_MSG_NON_BITD_PID_STR \ + "PID from where signature request came, does not belong to bit-rot " \ + "daemon. Unwinding the fop" +#define BRS_MSG_SIGN_PREPARE_FAIL_STR \ + "failed to prepare the signature. Unwinding the fop" +#define BRS_MSG_VERSION_PREPARE_FAIL_STR \ + "failed to prepare the version. Unwinding the fop" +#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding" +#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad" +#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \ + "bad object marking is not from the scrubber" +#define BRS_MSG_ALLOC_MEM_FAILED_STR "failed to allocate memory" +#define BRS_MSG_SET_INTERNAL_XATTR_STR "called on the internal xattr" +#define BRS_MSG_REMOVE_INTERNAL_XATTR_STR "removexattr called on internal xattr" +#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED_STR \ + "failed to create anonymous fd for the inode" +#define BRS_MSG_ADD_FD_TO_LIST_FAILED_STR "failed add fd to the list" +#define BRS_MSG_SET_FD_CONTEXT_FAILED_STR \ + "failed to set the fd context for the file" +#define BRS_MSG_NULL_LOCAL_STR "local is NULL" +#define BRS_MSG_DICT_ALLOC_FAILED_STR \ + "dict allocation failed: cannot send IPC FOP to changelog" +#define BRS_MSG_SET_EVENT_FAILED_STR "cannot set release event in dict" +#define BRS_MSG_CREATE_FRAME_FAILED_STR "create_frame() failure" +#define BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL_STR "closedir error" +#define BRS_MSG_LINK_FAIL_STR "failed to record gfid" +#define BRS_MSG_BAD_OBJ_UNLINK_FAIL_STR \ + "failed to delete bad object link from quaratine directory" +#define BRS_MSG_BAD_OBJECT_DIR_FAIL_STR "failed stub directory" +#define BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL_STR \ + "seekdir failed. Invalid argument (offset reused from another DIR * " \ + "structure)" +#define BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL_STR "telldir failed on dir" +#define BRS_MSG_BAD_OBJECT_DIR_READ_FAIL_STR "readdir failed on dir" +#define BRS_MSG_CREATE_GF_DIRENT_FAILED_STR "could not create gf_dirent" +#define BRS_MSG_GET_FD_CONTEXT_FAILED_STR "pfd is NULL" +#define BRS_MSG_BAD_HANDLE_DIR_NULL_STR "dir if NULL" +#define BRS_MSG_ALLOC_FAILED_STR \ + "failed to allocate new dict for saving the paths of the corrupted " \ + "objects. Scrub status will only display the gfid" +#define BRS_MSG_PATH_GET_FAILED_STR "failed to get the path" +#define BRS_MSG_PATH_XATTR_GET_FAILED_STR \ + "failed to get the path xattr from disk for the gfid. Trying to get path " \ + "from the memory" +#define BRS_MSG_DICT_SET_FAILED_STR \ + "failed to set the actual path as the value in the dict for the " \ + "corrupted object" +#define BRS_MSG_SET_CONTEXT_FAILED_STR \ + "could not set fd context for release callback" +#define BRS_MSG_CHANGE_VERSION_FAILED_STR "change version failed" #endif /* !_BITROT_STUB_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 58021089ff6..447dd47ff41 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -13,7 +13,6 @@ #include <signal.h> #include <glusterfs/glusterfs.h> -#include <glusterfs/xlator.h> #include <glusterfs/logging.h> #include "changelog.h" #include <glusterfs/compat-errno.h> @@ -26,6 +25,15 @@ #define BR_STUB_REQUEST_COOKIE 0x1 +void +br_stub_lock_cleaner(void *arg) +{ + pthread_mutex_t *clean_mutex = arg; + + pthread_mutex_unlock(clean_mutex); + return; +} + void * br_stub_signth(void *); @@ -48,8 +56,7 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, - "Memory accounting init failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, NULL); return ret; } @@ -64,29 +71,29 @@ br_stub_bad_object_container_init(xlator_t *this, br_stub_private_t *priv) ret = pthread_cond_init(&priv->container.bad_cond, NULL); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, - "pthread_cond_init failed (%d)", ret); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, + "cond_init ret=%d", ret, NULL); goto out; } ret = pthread_mutex_init(&priv->container.bad_lock, NULL); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, - "pthread_mutex_init failed (%d)", ret); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, + "mutex_init ret=%d", ret, NULL); goto cleanup_cond; } ret = pthread_attr_init(&w_attr); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, - "pthread_attr_init failed (%d)", ret); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, + "attr_init ret=%d", ret, NULL); goto cleanup_lock; } ret = pthread_attr_setstacksize(&w_attr, BAD_OBJECT_THREAD_STACK_SIZE); if (ret == EINVAL) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, - "Using default thread stack size"); + gf_smsg(this->name, GF_LOG_WARNING, 0, + BRS_MSG_USING_DEFAULT_THREAD_SIZE, NULL); } INIT_LIST_HEAD(&priv->container.bad_queue); @@ -122,8 +129,7 @@ init(xlator_t *this) br_stub_private_t *priv = NULL; if (!this->children) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, - "FATAL: no children"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, NULL); goto error_return; } @@ -161,16 +167,20 @@ init(xlator_t *this) * assigned inside the thread. So setting this->private here. */ this->private = priv; + if (!priv->do_versioning) + return 0; ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, "brssign"); - if (ret != 0) + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, + NULL); goto cleanup_lock; + } ret = br_stub_bad_object_container_init(this, priv); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, - "failed to launch the thread for storing bad gfids"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, NULL); goto cleanup_lock; } @@ -183,6 +193,7 @@ cleanup_lock: pthread_mutex_destroy(&priv->lock); free_mempool: mem_pool_destroy(priv->local_pool); + priv->local_pool = NULL; free_priv: GF_FREE(priv); this->private = NULL; @@ -211,10 +222,62 @@ reconfigure(xlator_t *this, dict_t *options) priv = this->private; - GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, out); + GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); + if (priv->do_versioning && !priv->signth) { + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, 0, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, NULL); + goto err; + } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, + NULL); + goto err; + } + } else { + if (priv->signth) { + if (gf_thread_cleanup_xint(priv->signth)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } else { + gf_smsg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, + NULL); + priv->signth = 0; + } + } + + if (priv->container.thread) { + if (gf_thread_cleanup_xint(priv->container.thread)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } + priv->container.thread = 0; + } + } ret = 0; -out: + return ret; +err: + if (priv->signth) { + if (gf_thread_cleanup_xint(priv->signth)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } + priv->signth = 0; + } + + if (priv->container.thread) { + if (gf_thread_cleanup_xint(priv->container.thread)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } + priv->container.thread = 0; + } + ret = -1; return ret; } @@ -245,10 +308,13 @@ fini(xlator_t *this) if (!priv) return; + if (!priv->do_versioning) + goto cleanup; + ret = gf_thread_cleanup_xint(priv->signth); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, - "Could not cancel sign serializer thread"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, + NULL); goto out; } priv->signth = 0; @@ -262,13 +328,10 @@ fini(xlator_t *this) GF_FREE(sigstub); } - pthread_mutex_destroy(&priv->lock); - pthread_cond_destroy(&priv->cond); - ret = gf_thread_cleanup_xint(priv->container.thread); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, - "Could not cancel sign serializer thread"); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, + NULL); goto out; } @@ -280,14 +343,18 @@ fini(xlator_t *this) call_stub_destroy(stub); } + pthread_mutex_destroy(&priv->container.bad_lock); + pthread_cond_destroy(&priv->container.bad_cond); + +cleanup: + pthread_mutex_destroy(&priv->lock); + pthread_cond_destroy(&priv->cond); + if (priv->local_pool) { mem_pool_destroy(priv->local_pool); priv->local_pool = NULL; } - pthread_mutex_destroy(&priv->container.bad_lock); - pthread_cond_destroy(&priv->container.bad_cond); - this->private = NULL; GF_FREE(priv); @@ -357,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict, priv = this->private; br_set_ongoingversion(obuf, oversion, priv->boot); - return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, - sizeof(br_version_t)); + return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, + sizeof(br_version_t)); } static int @@ -369,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf, br_set_signature(sbuf, sign, signaturelen, &size); - return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, - size); + return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size); } /** @@ -510,11 +576,9 @@ br_stub_need_versioning(xlator_t *this, fd_t *fd, gf_boolean_t *versioning, ret = br_stub_init_inode_versions(this, fd, fd->inode, version, _gf_true, _gf_false, &ctx_addr); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to " - " init the inode context for the inode %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); goto error_return; } } @@ -548,10 +612,8 @@ br_stub_anon_fd_ctx(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) if (!br_stub_fd) { ret = br_stub_add_fd_to_inode(this, fd, ctx); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE, - "failed to add fd to " - "the inode (gfid: %s)", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } } @@ -571,9 +633,8 @@ br_stub_versioning_prep(call_frame_t *frame, xlator_t *this, fd_t *fd, local = br_stub_alloc_local(this); if (!local) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, - "local allocation failed (gfid: %s)", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); goto error_return; } @@ -643,8 +704,8 @@ br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret, ret = br_stub_is_bad_object(this, inode); if (ret == -2) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS, - "%s is a bad object. Returning", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS, + "gfid=%s", uuid_utoa(inode->gfid), NULL); *op_ret = -1; *op_errno = EIO; } @@ -653,9 +714,9 @@ br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret, ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true, _gf_false, NULL); if (ret) { - gf_msg( - this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to init inode context for %s", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(inode->gfid), NULL); *op_ret = -1; *op_errno = EINVAL; } @@ -792,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame, op_errno = ENOMEM; dict = dict_new(); if (!dict) - goto done; + goto out; ret = br_stub_alloc_versions(&obuf, NULL, 0); - if (ret) - goto dealloc_dict; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + br_stub_dealloc_versions(obuf); + goto out; + } ret = br_stub_fd_versioning( this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk, writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); - -dealloc_versions: - br_stub_dealloc_versions(obuf); -dealloc_dict: - dict_unref(dict); -done: +out: + if (dict) + dict_unref(dict); if (ret) { if (local) frame->local = NULL; @@ -846,6 +911,24 @@ br_stub_signth(void *arg) THIS = this; while (1) { + /* + * Disabling bit-rot feature leads to this particular thread + * getting cleaned up by reconfigure via a call to the function + * gf_thread_cleanup_xint (which in turn calls pthread_cancel + * and pthread_join). But, if this thread had held the mutex + * &priv->lock at the time of cancellation, then it leads to + * deadlock in future when bit-rot feature is enabled (which + * again spawns this thread which cant hold the lock as the + * mutex is still held by the previous instance of the thread + * which got killed). Also, the br_stub_handle_object_signature + * function which is called whenever file has to be signed + * also gets blocked as it too attempts to acquire &priv->lock. + * + * So, arrange for the lock to be unlocked as part of the + * cleanup of this thread using pthread_cleanup_push and + * pthread_cleanup_pop. + */ + pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); pthread_mutex_lock(&priv->lock); { while (list_empty(&priv->squeue)) @@ -856,6 +939,7 @@ br_stub_signth(void *arg) list_del_init(&sigstub->list); } pthread_mutex_unlock(&priv->lock); + pthread_cleanup_pop(0); call_resume(sigstub->stub); @@ -931,10 +1015,9 @@ br_stub_compare_sign_version(xlator_t *this, inode_t *inode, if (invalid) { ret = -1; - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR, - "Signing version exceeds " - "current version [%lu > %lu]", - sbuf->signedversion, ctx->currentversion); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR, + "Signing-ver=%lu", sbuf->signedversion, "current-ver=%lu", + ctx->currentversion, NULL); } out: @@ -945,31 +1028,36 @@ static int br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode, br_isignature_t *sign, int *fakesuccess) { - int32_t ret = 0; + int32_t ret = -1; size_t signaturelen = 0; br_signature_t *sbuf = NULL; if (!br_is_signature_type_valid(sign->signaturetype)) - goto error_return; + goto out; signaturelen = sign->signaturelen; ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen); - if (ret) - goto error_return; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + goto out; + } ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + br_stub_dealloc_versions(sbuf); + goto out; + } + /* At this point sbuf has been added to dict, so the memory will be freed + * when the data from the dict is destroyed + */ ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess); - if (ret) - goto dealloc_versions; - - return 0; - -dealloc_versions: - br_stub_dealloc_versions(sbuf); -error_return: - return -1; +out: + return ret; } static void @@ -986,12 +1074,18 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; - if (frame->root->pid != GF_CLIENT_PID_BITD) + if (frame->root->pid != GF_CLIENT_PID_BITD) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, + "PID=%d", frame->root->pid, NULL); goto dofop; + } ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto dofop; + } if (fakesuccess) { op_ret = op_errno = 0; goto dofop; @@ -1141,10 +1235,8 @@ br_stub_handle_object_reopen(call_frame_t *frame, xlator_t *this, fd_t *fd, stub = fop_fsetxattr_cbk_stub(frame, br_stub_fsetxattr_resume, 0, 0, NULL); if (!stub) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for fsetxattr fop (gfid: %s)," - " unwinding", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto cleanup_local; } @@ -1198,9 +1290,8 @@ br_stub_fsetxattr_bad_object_cbk(call_frame_t *frame, void *cookie, */ ret = br_stub_mark_object_bad(this, local->u.context.inode); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL, - "failed to mark object %s as bad", - uuid_utoa(local->u.context.inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL, + "gfid=%s", uuid_utoa(local->u.context.inode->gfid), NULL); ret = br_stub_add(this, local->u.context.inode->gfid); @@ -1220,18 +1311,15 @@ br_stub_handle_bad_object_key(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t op_errno = EINVAL; if (frame->root->pid != GF_CLIENT_PID_SCRUB) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, - "bad object marking " - "on %s is not from the scrubber", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto unwind; } local = br_stub_alloc_local(this); if (!local) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY, - "failed to allocate memory for fsetxattr on %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL); op_ret = -1; op_errno = ENOMEM; goto unwind; @@ -1270,10 +1358,9 @@ br_stub_handle_internal_xattr(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t op_ret = -1; int32_t op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR, - "setxattr called" - " on the internal xattr %s for inode %s", - key, uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR, + "setxattr key=%s", key, "inode-gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); return 0; @@ -1291,10 +1378,8 @@ br_stub_dump_xattr(xlator_t *this, dict_t *dict, int *op_errno) goto out; } dict_dump_to_str(dict, dump, BR_STUB_DUMP_STR_SIZE, format); - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR, - "fsetxattr called on " - "internal xattr %s", - dump); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR, + "fsetxattr dump=%s", dump, NULL); out: if (dump) { GF_FREE(dump); @@ -1331,6 +1416,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, /* object signature request */ ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); if (!ret) { + gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", + uuid_utoa(fd->inode->gfid)); br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); goto done; } @@ -1423,10 +1510,8 @@ br_stub_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (!strcmp(BITROT_OBJECT_BAD_KEY, name) || !strcmp(BITROT_SIGNING_VERSION_KEY, name) || !strcmp(BITROT_CURRENT_VERSION_KEY, name)) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR, - "removexattr called" - " on internal xattr %s for file %s", - name, loc->path); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR, + "name=%s", name, "file-path=%s", loc->path, NULL); goto unwind; } @@ -1448,10 +1533,9 @@ br_stub_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(BITROT_OBJECT_BAD_KEY, name) || !strcmp(BITROT_SIGNING_VERSION_KEY, name) || !strcmp(BITROT_CURRENT_VERSION_KEY, name)) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR, - "removexattr called" - " on internal xattr %s for inode %s", - name, uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR, + "name=%s", name, "inode-gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); goto unwind; } @@ -1537,10 +1621,8 @@ br_stub_is_object_stale(xlator_t *this, call_frame_t *frame, inode_t *inode, ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the " - "inode context for %s", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); goto out; } @@ -1711,9 +1793,7 @@ br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { void *cookie = NULL; - uuid_t rootgfid = { - 0, - }; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk; int32_t op_ret = -1; int32_t op_errno = EINVAL; @@ -1725,8 +1805,6 @@ br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, GF_VALIDATE_OR_GOTO(this->name, this->private, unwind); GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind); - rootgfid[15] = 1; - if (!name) { cbk = br_stub_listxattr_cbk; goto wind; @@ -1796,16 +1874,13 @@ br_stub_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { void *cookie = NULL; - uuid_t rootgfid = { - 0, - }; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk; int32_t op_ret = -1; int32_t op_errno = EINVAL; br_stub_local_t *local = NULL; br_stub_private_t *priv = NULL; - rootgfid[15] = 1; priv = this->private; if (!name) { @@ -2025,10 +2100,8 @@ br_stub_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, offset, flags, iobref, xdata); if (!stub) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for write fop (gfid: %s), " - "unwinding", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "write gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto cleanup_local; } @@ -2141,10 +2214,8 @@ br_stub_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, stub = fop_ftruncate_stub(frame, br_stub_ftruncate_resume, fd, offset, xdata); if (!stub) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for ftruncate fop (gfid: %s)," - " unwinding", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "ftruncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto cleanup_local; } @@ -2248,10 +2319,8 @@ br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, fd = fd_anonymous(loc->inode); if (!fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, - "failed to create " - "anonymous fd for the inode %s", - uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, + "inode-gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto unwind; } @@ -2281,10 +2350,8 @@ br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, stub = fop_truncate_stub(frame, br_stub_truncate_resume, loc, offset, xdata); if (!stub) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for truncate fop (gfid: %s), " - "unwinding", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "truncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto cleanup_local; } @@ -2357,11 +2424,9 @@ br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ret = br_stub_init_inode_versions(this, fd, fd->inode, version, _gf_true, _gf_false, &ctx_addr); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to init the inode context for " - "the file %s (gfid: %s)", - loc->path, uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto unwind; } } @@ -2380,9 +2445,8 @@ br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ret = br_stub_add_fd_to_inode(this, fd, ctx); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED, - "failed add fd to the list (gfid: %s)", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto unwind; } @@ -2413,10 +2477,8 @@ br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) ret = br_stub_require_release_call(this, fd, &br_stub_fd); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED, - "failed to set the fd " - "context for the file (gfid: %s)", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -3125,8 +3187,7 @@ br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; if (!local) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL, - "local is NULL"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL, NULL); goto unwind; } inode = local->u.context.inode; @@ -3144,9 +3205,8 @@ br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * has to be removed manually. Its not a good idea to fail * the fop, as the object has already been deleted. */ - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the context for the inode %s", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "inode-gfid=%s", uuid_utoa(inode->gfid), NULL); goto unwind; } @@ -3189,9 +3249,9 @@ br_stub_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, if (!local) { op_ret = -1; op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, - "failed to allocate memory for local (path: %s, gfid: %s)", - loc->path, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_ALLOC_MEM_FAILED, + "local path=%s", loc->path, "gfid=%s", + uuid_utoa(loc->inode->gfid), NULL); goto unwind; } @@ -3266,23 +3326,21 @@ br_stub_send_ipc_fop(xlator_t *this, fd_t *fd, unsigned long releaseversion, xdata = dict_new(); if (!xdata) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_NO_MEMORY, - "dict allocation failed: cannot send IPC FOP " - "to changelog"); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_DICT_ALLOC_FAILED, + NULL); goto out; } ret = dict_set_static_bin(xdata, "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, - "cannot set release event in dict"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, NULL); goto dealloc_dict; } frame = create_frame(this, this->ctx->pool); if (!frame) { - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED, - "create_frame() failure"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED, + NULL); goto dealloc_dict; } @@ -3417,8 +3475,8 @@ br_stub_releasedir(xlator_t *this, fd_t *fd) if (fctx->bad_object.dir) { ret = sys_closedir(fctx->bad_object.dir); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, - "closedir error: %s", strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, + "error=%s", strerror(errno), NULL); } GF_FREE(fctx); diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index e3afa29889a..edd79a77e4f 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -222,8 +222,8 @@ br_stub_require_release_call(xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx) ret = br_stub_fd_ctx_set(this, fd, br_stub_fd); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED, - "could not set fd context (for release callback"); + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED, + NULL); else *fd_ctx = br_stub_fd; @@ -273,10 +273,9 @@ __br_stub_set_ongoing_version(br_stub_inode_ctx_t *ctx, unsigned long version) if (ctx->currentversion < version) ctx->currentversion = version; else - gf_msg("bit-rot-stub", GF_LOG_WARNING, 0, BRS_MSG_CHANGE_VERSION_FAILED, - "current version: %lu" - "new version: %lu", - ctx->currentversion, version); + gf_smsg("bit-rot-stub", GF_LOG_WARNING, 0, + BRS_MSG_CHANGE_VERSION_FAILED, "current version=%lu", + ctx->currentversion, "new version=%lu", version, NULL); } static inline int @@ -398,9 +397,8 @@ br_stub_is_bad_object(xlator_t *this, inode_t *inode) ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for the inode %s", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "inode-gfid=%s", uuid_utoa(inode->gfid), NULL); bad_object = -1; goto out; } @@ -428,10 +426,8 @@ br_stub_mark_object_bad(xlator_t *this, inode_t *inode) ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the " - "inode context for the inode %s", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "inode-gfid=%s", uuid_utoa(inode->gfid), NULL); goto out; } diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py index 2cdbf1152b9..2da9f2d2a8c 100644 --- a/xlators/features/changelog/lib/examples/python/libgfchangelog.py +++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py @@ -1,8 +1,10 @@ import os from ctypes import * +from ctypes.util import find_library class Changes(object): - libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, use_errno=True) + libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, + use_errno=True) @classmethod def geterrno(cls): diff --git a/xlators/features/changelog/lib/src/changelog-lib-messages.h b/xlators/features/changelog/lib/src/changelog-lib-messages.h index 7695944b676..d7fe7274353 100644 --- a/xlators/features/changelog/lib/src/changelog-lib-messages.h +++ b/xlators/features/changelog/lib/src/changelog-lib-messages.h @@ -34,7 +34,7 @@ GLFS_MSGID( CHANGELOG_LIB_MSG_MMAP_FAILED, CHANGELOG_LIB_MSG_MUNMAP_FAILED, CHANGELOG_LIB_MSG_ASCII_ERROR, CHANGELOG_LIB_MSG_STAT_FAILED, CHANGELOG_LIB_MSG_GET_XATTR_FAILED, CHANGELOG_LIB_MSG_PUBLISH_ERROR, - CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, + CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_MIN_MAX_INFO, CHANGELOG_LIB_MSG_CLEANUP_ERROR, CHANGELOG_LIB_MSG_UNLINK_FAILED, CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED, CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED, CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, @@ -43,6 +43,32 @@ GLFS_MSGID( CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED, - CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED); + CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED, + CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED, + CHANGELOG_LIB_MSG_REQUESTING_INFO, CHANGELOG_LIB_MSG_FINAL_INFO); + +#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO_STR "Registering brick" +#define CHANGELOG_LIB_MSG_RENAME_FAILED_STR "error moving changelog file" +#define CHANGELOG_LIB_MSG_OPEN_FAILED_STR "cannot open changelog file" +#define CHANGELOG_LIB_MSG_UNLINK_FAILED_STR "failed to unlink" +#define CHANGELOG_LIB_MSG_FAILED_TO_RMDIR_STR "failed to rmdir" +#define CHANGELOG_LIB_MSG_STAT_FAILED_STR "stat failed on changelog file" +#define CHANGELOG_LIB_MSG_PARSE_ERROR_STR "could not parse changelog" +#define CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED_STR \ + "parsing error, ceased publishing..." +#define CHANGELOG_LIB_MSG_HTIME_ERROR_STR "fop failed on htime file" +#define CHANGELOG_LIB_MSG_GET_XATTR_FAILED_STR \ + "error extracting max timstamp from htime file" +#define CHANGELOG_LIB_MSG_MIN_MAX_INFO_STR "changelogs min max" +#define CHANGELOG_LIB_MSG_REQUESTING_INFO_STR "Requesting historical changelogs" +#define CHANGELOG_LIB_MSG_FINAL_INFO_STR "FINAL" +#define CHANGELOG_LIB_MSG_HIST_FAILED_STR \ + "Requested changelog range is not available" +#define CHANGELOG_LIB_MSG_GET_TIME_ERROR_STR "wrong result" +#define CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO_STR \ + "Cleaning brick entry for brick" +#define CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO_STR "Draining event" +#define CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO_STR "Drained event" +#define CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO_STR "freeing entry" #endif /* !_CHANGELOG_MESSAGES_H_ */ diff --git a/xlators/features/changelog/lib/src/gf-changelog-api.c b/xlators/features/changelog/lib/src/gf-changelog-api.c index 219ce7d560a..81a5cbfec10 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-api.c +++ b/xlators/features/changelog/lib/src/gf-changelog-api.c @@ -56,8 +56,8 @@ gf_changelog_done(char *file) ret = sys_rename(buffer, to_path); if (ret) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, "cannot move changelog file", - "from=%s", file, "to=%s", to_path, NULL); + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s", + to_path, NULL); goto out; } diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c index 03dac5e4729..75f8a6dfc08 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-helpers.c +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c @@ -13,12 +13,6 @@ #include "changelog-lib-messages.h" #include <glusterfs/syscall.h> -ssize_t -gf_changelog_read_path(int fd, char *buffer, size_t bufsize) -{ - return sys_read(fd, buffer, bufsize); -} - size_t gf_changelog_write(int fd, char *buffer, size_t len) { @@ -64,20 +58,7 @@ gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr) * made a part of libglusterfs. */ -static pthread_key_t rl_key; -static pthread_once_t rl_once = PTHREAD_ONCE_INIT; - -static void -readline_destructor(void *ptr) -{ - GF_FREE(ptr); -} - -static void -readline_once(void) -{ - pthread_key_create(&rl_key, readline_destructor); -} +static __thread read_line_t thread_tsd = {}; static ssize_t my_read(read_line_t *tsd, int fd, char *ptr) @@ -97,27 +78,6 @@ my_read(read_line_t *tsd, int fd, char *ptr) return 1; } -static int -gf_readline_init_once(read_line_t **tsd) -{ - if (pthread_once(&rl_once, readline_once) != 0) - return -1; - - *tsd = pthread_getspecific(rl_key); - if (*tsd) - goto out; - - *tsd = GF_CALLOC(1, sizeof(**tsd), gf_changelog_mt_libgfchangelog_rl_t); - if (!*tsd) - return -1; - - if (pthread_setspecific(rl_key, *tsd) != 0) - return -1; - -out: - return 0; -} - ssize_t gf_readline(int fd, void *vptr, size_t maxlen) { @@ -125,10 +85,7 @@ gf_readline(int fd, void *vptr, size_t maxlen) size_t rc = 0; char c = ' '; char *ptr = NULL; - read_line_t *tsd = NULL; - - if (gf_readline_init_once(&tsd)) - return -1; + read_line_t *tsd = &thread_tsd; ptr = vptr; for (n = 1; n < maxlen; n++) { @@ -151,10 +108,7 @@ off_t gf_lseek(int fd, off_t offset, int whence) { off_t off = 0; - read_line_t *tsd = NULL; - - if (gf_readline_init_once(&tsd)) - return -1; + read_line_t *tsd = &thread_tsd; off = sys_lseek(fd, offset, whence); if (off == -1) @@ -169,10 +123,7 @@ gf_lseek(int fd, off_t offset, int whence) int gf_ftruncate(int fd, off_t length) { - read_line_t *tsd = NULL; - - if (gf_readline_init_once(&tsd)) - return -1; + read_line_t *tsd = &thread_tsd; if (sys_ftruncate(fd, 0)) return -1; diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h index afa0549bdad..9c609d33172 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-helpers.h +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h @@ -205,9 +205,6 @@ typedef struct gf_private { void * gf_changelog_process(void *data); -ssize_t -gf_changelog_read_path(int fd, char *buffer, size_t bufsize); - void gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr); diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c index d2ac7efd7c7..7f6e2329e71 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c +++ b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c @@ -526,9 +526,8 @@ gf_changelog_publish(xlator_t *this, gf_changelog_journal_t *jnl, ret = sys_rename(to_path, dest); if (ret) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, - "error moving changelog to processing dir", "path=%s", to_path, - NULL); + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path, "to=%s", + dest, NULL); } out: @@ -564,14 +563,14 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl, if (ret || !S_ISREG(stbuf.st_mode)) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_STAT_FAILED, - "stat failed on changelog file", "path=%s", from_path, NULL); + "path=%s", from_path, NULL); goto out; } fd1 = open(from_path, O_RDONLY); if (fd1 < 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED, - "cannot open changelog file", "path=%s", from_path, NULL); + "path=%s", from_path, NULL); goto out; } @@ -579,7 +578,7 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd2 < 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED, - "cannot create ascii changelog file", "path=%s", to_path, NULL); + "path=%s", to_path, NULL); goto close_fd; } else { ret = gf_changelog_decode(this, jnl, fd1, fd2, &stbuf, &zerob); @@ -594,9 +593,8 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl, ret = sys_rename(to_path, dest); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, - "error moving changelog to processing dir", "path=%s", - to_path, NULL); + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path, + "to=%s", dest, NULL); } /* remove it from .current if it's an empty file */ @@ -605,9 +603,8 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl, ret = sys_unlink(to_path); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_UNLINK_FAILED, - "could not unlink empty changelog", "path=%s", to_path, - NULL); + CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=empty changelog", + "path=%s", to_path, NULL); } } @@ -828,7 +825,7 @@ gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl) ret = recursive_rmdir(jnl->jnl_current_dir); if (ret) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "Failed to rmdir", "path=%s", + CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s", jnl->jnl_current_dir, NULL); goto out; } @@ -849,7 +846,7 @@ gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl) ret = recursive_rmdir(jnl->jnl_processing_dir); if (ret) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "Failed to rmdir", "path=%s", + CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s", jnl->jnl_processing_dir, NULL); goto out; } diff --git a/xlators/features/changelog/lib/src/gf-changelog-reborp.c b/xlators/features/changelog/lib/src/gf-changelog-reborp.c index 91f189d13ab..56b11cbb705 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-reborp.c +++ b/xlators/features/changelog/lib/src/gf-changelog-reborp.c @@ -22,7 +22,7 @@ * initiator is PROBER, data transfer is REBORP. */ -struct rpcsvc_program *gf_changelog_reborp_programs[]; +static struct rpcsvc_program *gf_changelog_reborp_programs[]; void * gf_changelog_connection_janitor(void *arg) @@ -55,9 +55,8 @@ gf_changelog_connection_janitor(void *arg) ev = &entry->event; gf_smsg(this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, - "Cleaning brick entry for brick", "brick=%s", entry->brick, - NULL); + CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, "brick=%s", + entry->brick, NULL); /* 0x0: disable rpc-clnt */ rpc_clnt_disable(RPC_PROBER(entry)); @@ -71,21 +70,19 @@ gf_changelog_connection_janitor(void *arg) while (!list_empty(&ev->events)) { event = list_first_entry(&ev->events, struct gf_event, list); gf_smsg(this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "Draining event", - "seq=%lu", event->seq, "payload=%d", event->count, NULL); + CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "seq=%lu", + event->seq, "payload=%d", event->count, NULL); GF_FREE(event); drained++; } gf_smsg(this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "Drained events", - "num=%lu", drained, NULL); + CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, "num=%lu", drained, NULL); /* 0x3: freeup brick entry */ gf_smsg(this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "freeing entry", - "entry=%p", entry, NULL); + CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "entry=%p", entry, NULL); LOCK_DESTROY(&entry->statelock); GF_FREE(entry); } @@ -112,9 +109,7 @@ gf_changelog_reborp_rpcsvc_notify(rpcsvc_t *rpc, void *mydata, ret = sys_unlink(RPC_SOCK(entry)); if (ret != 0) gf_smsg(this->name, GF_LOG_WARNING, errno, - CHANGELOG_LIB_MSG_UNLINK_FAILED, - "failed to unlink " - "reverse socket", + CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=reverse socket", "path=%s", RPC_SOCK(entry), NULL); if (entry->connected) GF_CHANGELOG_INVOKE_CBK(this, entry->connected, entry->brick, @@ -391,11 +386,10 @@ gf_changelog_reborp_handle_event(rpcsvc_request_t *req) return gf_changelog_event_handler(req, this, entry); } -rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = { +static rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = { [CHANGELOG_REV_PROC_EVENT] = {"CHANGELOG EVENT HANDLER", - CHANGELOG_REV_PROC_EVENT, - gf_changelog_reborp_handle_event, NULL, 0, - DRC_NA}, + gf_changelog_reborp_handle_event, NULL, + CHANGELOG_REV_PROC_EVENT, DRC_NA, 0}, }; /** @@ -404,7 +398,7 @@ rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = { * and that's required to invoke the callback with the appropriate * brick path and it's private data. */ -struct rpcsvc_program gf_changelog_reborp_prog = { +static struct rpcsvc_program gf_changelog_reborp_prog = { .progname = "LIBGFCHANGELOG REBORP", .prognum = CHANGELOG_REV_RPC_PROCNUM, .progver = CHANGELOG_REV_RPC_PROCVER, @@ -413,7 +407,7 @@ struct rpcsvc_program gf_changelog_reborp_prog = { .synctask = _gf_false, }; -struct rpcsvc_program *gf_changelog_reborp_programs[] = { +static struct rpcsvc_program *gf_changelog_reborp_programs[] = { &gf_changelog_reborp_prog, NULL, }; diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c index 7ed9e553e68..57c3d39ef76 100644 --- a/xlators/features/changelog/lib/src/gf-changelog.c +++ b/xlators/features/changelog/lib/src/gf-changelog.c @@ -102,8 +102,8 @@ gf_changelog_ctx_defaults_init(glusterfs_ctx_t *ctx) if (!ctx->iobuf_pool) goto free_pool; - ctx->event_pool = event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE, - GF_CHANGELOG_EVENT_THREAD_COUNT); + ctx->event_pool = gf_event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE, + GF_CHANGELOG_EVENT_THREAD_COUNT); if (!ctx->event_pool) goto free_pool; @@ -237,9 +237,8 @@ gf_changelog_init_master() { int ret = 0; - mem_pools_init_early(); ret = gf_changelog_init_context(); - mem_pools_init_late(); + mem_pools_init(); return ret; } @@ -574,9 +573,8 @@ gf_changelog_register_generic(struct gf_brick_spec *bricks, int count, brick = bricks; while (count--) { gf_smsg(this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "Registering brick", - "brick=%s", brick->brick_path, "notify_filter=%d", - brick->filter, NULL); + CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "brick=%s", + brick->brick_path, "notify_filter=%d", brick->filter, NULL); ret = gf_changelog_register_brick(this, brick, need_order, xl); if (ret != 0) { diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c index 3e384ea0784..a16219f3664 100644 --- a/xlators/features/changelog/lib/src/gf-history-changelog.c +++ b/xlators/features/changelog/lib/src/gf-history-changelog.c @@ -79,8 +79,8 @@ gf_history_changelog_done(char *file) ret = sys_rename(buffer, to_path); if (ret) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, "cannot move changelog file", - "from=%s", file, "to=%s", to_path, NULL); + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s", + to_path, NULL); goto out; } @@ -522,8 +522,7 @@ gf_changelog_consume_wrap(void *data) _gf_true); if (ret) { gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_PARSE_ERROR, - "could not parse changelog", "name=%s", ccd->changelog, - NULL); + "name=%s", ccd->changelog, NULL); goto out; } } @@ -564,9 +563,6 @@ gf_history_consume(void *data) {0}, }; gf_changelog_consume_data_t *curr = NULL; - char thread_name[GF_THREAD_NAMEMAX] = { - 0, - }; hist_data = (gf_changelog_history_data_t *)data; if (hist_data == NULL) { @@ -612,12 +608,10 @@ gf_history_consume(void *data) curr->retval = 0; memset(curr->changelog, '\0', PATH_MAX); - snprintf(thread_name, sizeof(thread_name), "clogc%03hx", - ((iter + 1) & 0x3ff)); ret = gf_thread_create(&th_id[iter], NULL, gf_changelog_consume_wrap, curr, - thread_name); + "clogc%03hx", (iter + 1) & 0x3ff); if (ret) { gf_msg(this->name, GF_LOG_ERROR, ret, CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED, @@ -647,9 +641,8 @@ gf_history_consume(void *data) curr = &ccd[iter]; if (ccd->retval) { publish = _gf_false; - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_PARSE_ERROR, - "parsing error, ceased publishing..."); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED, NULL); continue; } @@ -728,7 +721,7 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd, if (ret) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR, - "stat() failed on htime file", "path=%s", htime_file, NULL); + "op=stat", "path=%s", htime_file, NULL); goto out; } @@ -742,7 +735,7 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd, if (*fd < 0) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR, - "open() failed for htime file", "path=%s", htime_file, NULL); + "op=open", "path=%s", htime_file, NULL); goto out; } @@ -751,17 +744,15 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd, if (ret < 0) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_GET_XATTR_FAILED, - "error extracting max timstamp from htime file" - "path=%s", - htime_file, NULL); + CHANGELOG_LIB_MSG_GET_XATTR_FAILED, "path=%s", htime_file, + NULL); goto out; } sscanf(x_value, "%lu:%lu", max_ts, total); - gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, - "changelogs min max", "min=%lu", *min_ts, "max=%lu", *max_ts, - "total_changelogs=%lu", *total, NULL); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_MIN_MAX_INFO, + "min=%lu", *min_ts, "max=%lu", *max_ts, "total_changelogs=%lu", + *total, NULL); ret = 0; @@ -842,15 +833,14 @@ gf_history_changelog(char *changelog_dir, unsigned long start, goto out; } - gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, - "Requesting historical changelogs", "start=%lu", start, "end=%lu", - end, NULL); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_REQUESTING_INFO, + "start=%lu", start, "end=%lu", end, NULL); /* basic sanity check */ if (start > end || n_parallel <= 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HIST_FAILED, - "Sanity check failed", "start=%lu", start, "end=%lu", end, - "thread_count=%d", n_parallel, NULL); + "start=%lu", start, "end=%lu", end, "thread_count=%d", + n_parallel, NULL); ret = -1; goto out; } @@ -864,7 +854,7 @@ gf_history_changelog(char *changelog_dir, unsigned long start, dirp = sys_opendir(htime_dir); if (dirp == NULL) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR, - "open dir on htime failed", "path=%s", htime_dir, NULL); + "op=opendir", "path=%s", htime_dir, NULL); ret = -1; goto out; } @@ -876,9 +866,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start, if (!entry || errno != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_HIST_FAILED, - "Requested changelog range is not availbale", "start=%lu", - start, "end=%lu", end, NULL); + CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start, + "end=%lu", end, NULL); ret = -2; break; } @@ -916,9 +905,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start, if (gf_history_check(fd, from, start, len) != 0) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_GET_TIME_ERROR, - "wrong result for start", "start=%lu", start, "idx=%lu", - from, NULL); + CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=start", + "start=%lu", start, "idx=%lu", from, NULL); goto out; } @@ -949,9 +937,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start, if (gf_history_check(fd, to, end2, len) != 0) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_GET_TIME_ERROR, - "wrong result for end", "start=%lu", end2, "idx=%lu", - to, NULL); + CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=end", + "start=%lu", end2, "idx=%lu", to, NULL); goto out; } @@ -963,9 +950,9 @@ gf_history_changelog(char *changelog_dir, unsigned long start, if (ret == -1) goto out; - gf_smsg(this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, "FINAL", "from=%lu", ts1, - "to=%lu", ts2, "changes=%lu", (to - from + 1), NULL); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_FINAL_INFO, + "from=%lu", ts1, "to=%lu", ts2, "changes=%lu", + (to - from + 1), NULL); hist_data = GF_CALLOC(1, sizeof(gf_changelog_history_data_t), gf_changelog_mt_history_data_t); @@ -1003,11 +990,9 @@ gf_history_changelog(char *changelog_dir, unsigned long start, } else { /* end of range check */ gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_HIST_FAILED, - "Requested changelog range is not " - "available. Retrying next HTIME", - "start=%lu", start, "end=%lu", end, "chlog_min=%lu", min_ts, - "chlog_max=%lu", max_ts, NULL); + CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start, + "end=%lu", end, "chlog_min=%lu", min_ts, "chlog_max=%lu", + max_ts, NULL); } } /* end of readdir() */ diff --git a/xlators/features/changelog/src/changelog-barrier.c b/xlators/features/changelog/src/changelog-barrier.c index be7384ecd0f..0fb89ddb127 100644 --- a/xlators/features/changelog/src/changelog-barrier.c +++ b/xlators/features/changelog/src/changelog-barrier.c @@ -53,14 +53,14 @@ chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue) { call_stub_t *stub = NULL; - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, - "Dequeuing all the changelog barriered fops"); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, + NULL); while ((stub = __chlog_barrier_dequeue(this, queue))) call_resume(stub); - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, - "Dequeuing changelog barriered fops is finished"); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, NULL); return; } @@ -80,8 +80,7 @@ chlog_barrier_timeout(void *data) INIT_LIST_HEAD(&queue); - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_ERROR, - "Disabling changelog barrier because of the timeout."); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_TIMEOUT, NULL); LOCK(&priv->lock); { @@ -120,8 +119,8 @@ __chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv) priv->timer = gf_timer_call_after(this->ctx, priv->timeout, chlog_barrier_timeout, (void *)this); if (!priv->timer) { - gf_msg(this->name, GF_LOG_CRITICAL, 0, CHANGELOG_MSG_BARRIER_ERROR, - "Couldn't add changelog barrier timeout event."); + gf_smsg(this->name, GF_LOG_CRITICAL, 0, + CHANGELOG_MSG_TIMEOUT_ADD_FAILED, NULL); goto out; } diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c index f48dd63870a..aa94459de5a 100644 --- a/xlators/features/changelog/src/changelog-ev-handle.c +++ b/xlators/features/changelog/src/changelog-ev-handle.c @@ -225,8 +225,8 @@ changelog_ev_connector(void *data) changelog_rpc_notify); if (!crpc->rpc) { gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_RPC_CONNECT_ERROR, - "failed to connect back", "path=%s", crpc->sock, NULL); + CHANGELOG_MSG_RPC_CONNECT_ERROR, "path=%s", crpc->sock, + NULL); crpc->cleanup(crpc); goto mutex_unlock; } @@ -378,9 +378,8 @@ changelog_ev_dispatch(void *data) ret = rbuf_wait_for_completion(c_clnt->rbuf, opaque, _dispatcher, c_clnt); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_PUT_BUFFER_FAILED, - "failed to put buffer after consumption"); + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_PUT_BUFFER_FAILED, NULL); } return NULL; diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 0be8f411164..e561997d858 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -22,6 +22,7 @@ #include "changelog-encoders.h" #include "changelog-rpc-common.h" #include <pthread.h> +#include <time.h> static void changelog_cleanup_free_mutex(void *arg_mutex) @@ -41,16 +42,15 @@ changelog_thread_cleanup(xlator_t *this, pthread_t thr_id) /* send a cancel request to the thread */ ret = pthread_cancel(thr_id); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, "could not cancel thread"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL); goto out; } ret = pthread_join(thr_id, &retval); if ((ret != 0) || (retval != PTHREAD_CANCELED)) { - gf_msg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, - "cancel request not adhered as expected"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL); } out: @@ -153,27 +153,6 @@ changelog_init_event_selection(xlator_t *this, return 0; } -int -changelog_cleanup_event_selection(xlator_t *this, - changelog_ev_selector_t *selection) -{ - int j = CHANGELOG_EV_SELECTION_RANGE; - - LOCK(&selection->reflock); - { - while (j--) { - if (selection->ref[j] > 0) - gf_msg(this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF, - "changelog event selection cleaning up " - " on active references"); - } - } - UNLOCK(&selection->reflock); - - return LOCK_DESTROY(&selection->reflock); -} - static void changelog_perform_dispatch(xlator_t *this, changelog_priv_t *priv, void *mem, size_t size) @@ -263,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len) } int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer) +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer) { char changelog_path[PATH_MAX + 1] = { 0, @@ -277,8 +255,8 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, int ret = 0; if (priv->htime_fd == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, - "Htime fd not available for updation"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + "reason=fd not available", NULL); ret = -1; goto out; } @@ -288,13 +266,13 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, goto out; } if (changelog_write(priv->htime_fd, (void *)changelog_path, len + 1) < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, - "Htime file content write failed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + "reason=write failed", NULL); ret = -1; goto out; } - len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts, + len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts, priv->rollover_count); if (len >= sizeof(x_value)) { ret = -1; @@ -303,12 +281,12 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, XATTR_REPLACE)) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, - "Htime xattr updation failed with XATTR_REPLACE", + "reason=xattr updation failed", "XATTR_REPLACE=true", "changelog=%s", changelog_path, NULL); if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, 0)) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, - "Htime xattr updation failed", "changelog=%s", + "reason=xattr updation failed", "changelog=%s", changelog_path, NULL); ret = -1; goto out; @@ -346,15 +324,15 @@ cl_is_empty(xlator_t *this, int fd) ret = sys_fstat(fd, &stbuf); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED, - "Could not stat (CHANGELOG)"); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED, + NULL); goto out; } ret = sys_lseek(fd, 0, SEEK_SET); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED, - "Could not lseek (CHANGELOG)"); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED, + NULL); goto out; } @@ -390,8 +368,8 @@ update_path(xlator_t *this, char *cl_path) found = strstr(cl_path, up_cl); if (found == NULL) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED, - "Could not find CHANGELOG in changelog path"); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PATH_NOT_FOUND, + NULL); goto out; } else { memcpy(found, low_cl, sizeof(low_cl) - 1); @@ -403,18 +381,22 @@ out: } static int -changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, - unsigned long ts) +changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ret = -1; int notify = 0; int cl_empty_flag = 0; + struct tm *gmt; + char yyyymmdd[40]; char ofile[PATH_MAX] = { 0, }; char nfile[PATH_MAX] = { 0, }; + char nfile_dir[PATH_MAX] = { + 0, + }; changelog_event_t ev = { 0, }; @@ -422,33 +404,37 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, if (priv->changelog_fd != -1) { ret = sys_fsync(priv->changelog_fd); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); } ret = cl_is_empty(this, priv->changelog_fd); if (ret == 1) { cl_empty_flag = 1; } else if (ret == -1) { /* Log error but proceed as usual */ - gf_msg(this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, - "Error detecting empty changelog"); + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, NULL); } sys_close(priv->changelog_fd); priv->changelog_fd = -1; } + /* Get GMT time. */ + gmt = gmtime(&ts); + + strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt); + (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, priv->changelog_dir); - (void)snprintf(nfile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME ".%lu", - priv->changelog_dir, ts); + (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld", + priv->changelog_dir, yyyymmdd, ts); + (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd); if (cl_empty_flag == 1) { ret = sys_unlink(ofile); if (ret) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_UNLINK_OP_FAILED, - "error unlinking empty changelog", "path=%s", ofile, NULL); + CHANGELOG_MSG_UNLINK_OP_FAILED, "path=%s", ofile, NULL); ret = 0; /* Error in unlinking empty changelog should not break further changelog operation, so reset return value to 0*/ @@ -456,13 +442,26 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, } else { ret = sys_rename(ofile, nfile); + /* Changelog file rename gets ENOENT when parent dir doesn't exist */ + if (errno == ENOENT) { + ret = mkdir_p(nfile_dir, 0600, _gf_true); + + if ((ret == -1) && (EEXIST != errno)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_MKDIR_ERROR, "%s", nfile_dir, NULL); + goto out; + } + + ret = sys_rename(ofile, nfile); + } + if (ret && (errno == ENOENT)) { ret = 0; goto out; } if (ret) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_RENAME_ERROR, - "error renaming", "from=%s", ofile, "to=%s", nfile, NULL); + "from=%s", ofile, "to=%s", nfile, NULL); } } @@ -476,8 +475,8 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, } ret = htime_update(this, priv, ts, nfile); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, - "could not update htime file"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + NULL); goto out; } } @@ -501,15 +500,10 @@ out: { if (ret) { priv->bn.bnotify_error = _gf_true; - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, - "Fail snapshot because of " - "previous errors"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, NULL); } else { gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO, - "Explicit " - "rollover changelog signaling " - "bnotify", "changelog=%s", nfile, NULL); } priv->bn.bnotify = _gf_false; @@ -556,8 +550,8 @@ find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname) cnt = scandir(ht_dir_path, &namelist, filter_cur_par_dirs, alphasort); if (cnt < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED, - "scandir failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED, + NULL); } else if (cnt > 0) { if (snprintf(ht_file_bname, NAME_MAX, "%s", namelist[cnt - 1]->d_name) >= NAME_MAX) { @@ -566,16 +560,15 @@ find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname) } if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, strlen(ht_file_bname), 0)) { - gf_msg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSETXATTR_FAILED, - "fsetxattr failed: HTIME_CURRENT"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSETXATTR_FAILED, "HTIME_CURRENT", NULL); ret = -1; goto out; } if (sys_fsync(ht_dir_fd) < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); ret = -1; goto out; } @@ -596,7 +589,7 @@ out: * returns -1 on failure or error */ int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -632,7 +625,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) ht_dir_fd = open(ht_dir_path, O_RDONLY); if (ht_dir_fd == -1) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, - "open failed", "path=%s", ht_dir_path, NULL); + "path=%s", ht_dir_path, NULL); ret = -1; goto out; } @@ -640,9 +633,8 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) size = sys_fgetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, sizeof(ht_file_bname)); if (size < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED, - "Error extracting" - " HTIME_CURRENT."); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED, + "name=HTIME_CURRENT", NULL); /* If upgrade scenario, find the latest HTIME.TSTAMP file * and use the same. If error, create a new HTIME.TSTAMP @@ -650,20 +642,18 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) */ cnt = find_current_htime(ht_dir_fd, ht_dir_path, ht_file_bname); if (cnt <= 0) { - gf_msg(this->name, GF_LOG_INFO, errno, CHANGELOG_MSG_HTIME_INFO, - "HTIME_CURRENT not found. Changelog enabled" - " before init"); + gf_smsg(this->name, GF_LOG_INFO, errno, + CHANGELOG_MSG_NO_HTIME_CURRENT, NULL); sys_close(ht_dir_fd); return htime_create(this, priv, ts); } - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, - "Error extracting" - " HTIME_CURRENT."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_HTIME_CURRENT_ERROR, NULL); } - gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO, - "HTIME_CURRENT", "path=%s", ht_file_bname, NULL); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_CURRENT, "path=%s", + ht_file_bname, NULL); len = snprintf(ht_file_path, PATH_MAX, "%s/%s", ht_dir_path, ht_file_bname); if ((len < 0) || (len >= PATH_MAX)) { ret = -1; @@ -676,7 +666,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (ht_file_fd < 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, - "unable to open htime file", "path=%s", ht_file_path, NULL); + "path=%s", ht_file_path, NULL); ret = -1; goto out; } @@ -686,8 +676,8 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) ret = sys_fstat(ht_file_fd, &stat_buf); if (ret < 0) { - gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, - "unable to stat htime file", "path=%s", ht_file_path, NULL); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_STAT_ERROR, + "path=%s", ht_file_path, NULL); ret = -1; goto out; } @@ -696,9 +686,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) size = sys_fgetxattr(ht_file_fd, HTIME_KEY, x_value, sizeof(x_value)); if (size < 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED, - "error extracting max" - " timstamp from htime file", - "path=%s", ht_file_path, NULL); + "name=%s", HTIME_KEY, "path=%s", ht_file_path, NULL); ret = -1; goto out; } @@ -710,14 +698,11 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) total1 = stat_buf.st_size / record_len; if (total != total1) { gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, - "Mismatch of changelog count. " - "INIT CASE", "xattr_total=%lu", total, "size_total=%lu", total1, NULL); } - gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, - "INIT CASE", "min=%lu", min_ts, "max=%lu", max_ts, - "total_changelogs=%lu", total, NULL); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "min=%lu", + min_ts, "max=%lu", max_ts, "total_changelogs=%lu", total, NULL); if (total < total1) priv->rollover_count = total1 + 1; @@ -734,7 +719,7 @@ out: * returns -1 on failure or error */ int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -751,15 +736,13 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) int flags = 0; int32_t len = 0; - gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO, - "Changelog enable: Creating new " - "HTIME file", - "name=%lu", ts, NULL); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE, + "name=%ld", ts, NULL); CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); /* get the htime file name in ht_file_path */ - len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path, + len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path, HTIME_FILE_NAME, ts); if ((len < 0) || (len >= PATH_MAX)) { ret = -1; @@ -771,23 +754,23 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (ht_file_fd < 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, - "unable to create htime file", "path=%s", ht_file_path, NULL); + "path=%s", ht_file_path, NULL); ret = -1; goto out; } if (sys_fsetxattr(ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE, sizeof(HTIME_INITIAL_VALUE) - 1, 0)) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED, - "Htime xattr initialization failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_XATTR_INIT_FAILED, NULL); ret = -1; goto out; } ret = sys_fsync(ht_file_fd); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, - "fsync failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, + NULL); goto out; } @@ -800,26 +783,25 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) ht_dir_fd = open(ht_dir_path, O_RDONLY); if (ht_dir_fd == -1) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, - "open failed", "path=%s", ht_dir_path, NULL); + "path=%s", ht_dir_path, NULL); ret = -1; goto out; } - (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu", + (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld", HTIME_FILE_NAME, ts); if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, strlen(ht_file_bname), 0)) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED, - "fsetxattr failed:" - " HTIME_CURRENT"); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED, + " HTIME_CURRENT", NULL); ret = -1; goto out; } ret = sys_fsync(ht_dir_fd); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, - "fsync failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, + NULL); goto out; } @@ -873,7 +855,7 @@ changelog_snap_open(xlator_t *this, changelog_priv_t *priv) fd = open(c_snap_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd < 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, - "unable to open file", "path=%s", c_snap_path, NULL); + "path=%s", c_snap_path, NULL); ret = -1; goto out; } @@ -905,8 +887,8 @@ changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv) int ret = 0; ret = changelog_snap_open(this, priv); - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, - "Now starting to log in call path"); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "starting", + NULL); return ret; } @@ -926,8 +908,8 @@ changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv) sys_close(priv->c_snap_fd); priv->c_snap_fd = -1; - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, - "Stopped to log in call path"); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "Stopped", + NULL); return ret; } @@ -955,9 +937,6 @@ changelog_open_journal(xlator_t *this, changelog_priv_t *priv) fd = open(changelog_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd < 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, - "unable to open/create changelog file." - " change-logging will be" - " inactive", "path=%s", changelog_path, NULL); goto out; } @@ -980,8 +959,8 @@ out: } int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale) +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale) { int ret = -1; @@ -1002,21 +981,12 @@ changelog_entry_length() return sizeof(changelog_log_data_t); } -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last) { - struct timeval tv = { - 0, - }; - cld->cld_type = CHANGELOG_TYPE_ROLLOVER; - - if (gettimeofday(&tv, NULL)) - return -1; - - cld->cld_roll_time = (unsigned long)tv.tv_sec; + cld->cld_roll_time = gf_time(); cld->cld_finale = is_last; - return 0; } int @@ -1074,11 +1044,10 @@ changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld) ret = changelog_snap_write_change(priv, buffer, off); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, - "error writing csnap to disk"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, + "csnap", NULL); } - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, - "Successfully wrote to csnap"); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_WROTE_TO_CSNAP, NULL); ret = 0; out: return ret; @@ -1095,9 +1064,8 @@ changelog_handle_change(xlator_t *this, changelog_priv_t *priv, ret = changelog_start_next_change(this, priv, cld->cld_roll_time, cld->cld_finale); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_GET_TIME_OP_FAILED, - "Problem rolling over changelog(s)"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_GET_TIME_OP_FAILED, NULL); goto out; } @@ -1111,16 +1079,16 @@ changelog_handle_change(xlator_t *this, changelog_priv_t *priv, if (CHANGELOG_TYPE_IS_FSYNC(cld->cld_type)) { ret = sys_fsync(priv->changelog_fd); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); } goto out; } ret = priv->ce->encode(this, cld); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, - "error writing changelog to disk"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, + "changelog", NULL); } out: @@ -1143,6 +1111,7 @@ changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_INODE_NOT_FOUND, "inode needed for version checking !!!"); + goto out; } @@ -1211,7 +1180,7 @@ changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv) ret = pthread_mutex_lock(&priv->dm.drain_black_mutex); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, - "pthread error", "error=%d", ret, NULL); + "error=%d", ret, NULL); while (priv->dm.black_fop_cnt > 0) { gf_msg_debug(this->name, 0, "Conditional wait on black fops: %ld", priv->dm.black_fop_cnt); @@ -1220,14 +1189,14 @@ changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv) &priv->dm.drain_black_mutex); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, - "pthread cond wait failed", "error=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret, + NULL); } priv->dm.drain_wait_black = _gf_false; ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, - "pthread error", "error=%d", ret, NULL); + "error=%d", ret, NULL); pthread_cleanup_pop(0); gf_msg_debug(this->name, 0, "Woke up: Conditional wait on black fops"); } @@ -1247,7 +1216,7 @@ changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv) ret = pthread_mutex_lock(&priv->dm.drain_white_mutex); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, - "pthread error", "error=%d", ret, NULL); + "error=%d", ret, NULL); while (priv->dm.white_fop_cnt > 0) { gf_msg_debug(this->name, 0, "Conditional wait on white fops : %ld", priv->dm.white_fop_cnt); @@ -1256,14 +1225,14 @@ changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv) &priv->dm.drain_white_mutex); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, - "pthread cond wait failed", "error=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret, + NULL); } priv->dm.drain_wait_white = _gf_false; ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); if (ret) gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, - "pthread error", "error=%d", ret, NULL); + "error=%d", ret, NULL); pthread_cleanup_pop(0); gf_msg_debug(this->name, 0, "Woke up: Conditional wait on white fops"); } @@ -1292,7 +1261,7 @@ changelog_rollover(void *data) while (1) { (void)pthread_testcancel(); - tv.tv_sec = time(NULL) + priv->rollover_time; + tv.tv_sec = gf_time() + priv->rollover_time; tv.tv_nsec = 0; ret = 0; /* Reset ret to zero */ @@ -1315,12 +1284,12 @@ changelog_rollover(void *data) pthread_cleanup_pop(0); if (ret == 0) { - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, - "Explicit wakeup on barrier notify"); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, + NULL); priv->explicit_rollover = _gf_true; } else if (ret && ret != ETIMEDOUT) { - gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SELECT_FAILED, - "pthread_cond_timedwait failed"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_SELECT_FAILED, NULL); continue; } else if (ret && ret == ETIMEDOUT) { gf_msg_debug(this->name, 0, "Wokeup on timeout"); @@ -1373,13 +1342,7 @@ changelog_rollover(void *data) if (priv->explicit_rollover == _gf_true) sleep(1); - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_GET_TIME_OP_FAILED, - "failed to fill rollover data"); - continue; - } + changelog_fill_rollover_data(&cld, _gf_false); _mask_cancellation(); @@ -1427,9 +1390,8 @@ changelog_fsync_thread(void *data) ret = changelog_inject_single_event(this, priv, &cld); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_INJECT_FSYNC_FAILED, - "failed to inject fsync event"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_INJECT_FSYNC_FAILED, NULL); _unmask_cancellation(); } @@ -1851,23 +1813,21 @@ changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc, parent = inode_parent(loc->inode, 0, 0); if (!parent) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_INODE_NOT_FOUND, - "Parent inode not found", "gfid=%s", - uuid_utoa(loc->inode->gfid), NULL); + "type=parent", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto err; } CHANGELOG_INIT_NOCHECK(this, *local, loc->inode, loc->inode->gfid, 5); if (!(*local)) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED, - "changelog local" - " initiatilization failed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED, + NULL); goto err; } co = changelog_get_usable_buffer(*local); if (!co) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_NO_MEMORY, - "Failed to get buffer"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_GET_BUFFER_FAILED, + NULL); goto err; } diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 517c4dc4883..38fa7590c32 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -31,7 +31,7 @@ */ typedef struct changelog_log_data { /* rollover related */ - unsigned long cld_roll_time; + time_t cld_roll_time; /* reopen changelog? */ gf_boolean_t cld_finale; @@ -97,12 +97,6 @@ struct changelog_encoder { typedef struct changelog_time_slice { /** - * just in case we need nanosecond granularity some day. - * field is unused as of now (maybe we'd need it later). - */ - struct timeval tv_start; - - /** * version of changelog file, incremented each time changes * rollover. */ @@ -190,8 +184,12 @@ typedef struct changelog_ev_selector { /* changelog's private structure */ struct changelog_priv { + /* changelog journalling */ gf_boolean_t active; + /* changelog live notifications */ + gf_boolean_t rpc_active; + /* to generate unique socket file per brick */ char *changelog_brick; @@ -419,11 +417,11 @@ changelog_local_t * changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, int xtra_records, gf_boolean_t update_flag); int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale); +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale); int changelog_open_journal(xlator_t *this, changelog_priv_t *priv); -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last); int changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, @@ -447,12 +445,11 @@ changelog_fsync_thread(void *data); int changelog_forget(xlator_t *this, inode_t *inode); int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer); +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer); int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts); int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts); /* Geo-Rep snapshot dependency changes */ void @@ -510,8 +507,6 @@ changelog_deselect_event(xlator_t *, changelog_ev_selector_t *, unsigned int); int changelog_init_event_selection(xlator_t *, changelog_ev_selector_t *); int -changelog_cleanup_event_selection(xlator_t *, changelog_ev_selector_t *); -int changelog_ev_selected(xlator_t *, changelog_ev_selector_t *, unsigned int); void changelog_dispatch_event(xlator_t *, changelog_priv_t *, changelog_event_t *); @@ -674,8 +669,8 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path, #define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) \ do { \ if (!priv->active) { \ - gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_NOT_ACTIVE, \ - "Changelog is not active, return success"); \ + gf_smsg(this->name, GF_LOG_WARNING, 0, \ + CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, NULL); \ ret = 0; \ goto label; \ } \ @@ -686,7 +681,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path, do { \ if (ret) { \ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ - "pthread error", "error=%d", ret, NULL); \ + "error=%d", ret, NULL); \ ret = -1; \ goto label; \ } \ @@ -697,7 +692,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path, do { \ if (ret) { \ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ - "pthread error", "error=%d", ret, NULL); \ + "error=%d", ret, NULL); \ ret = -1; \ flag = _gf_true; \ goto label; \ @@ -709,7 +704,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path, do { \ if (ret) { \ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ - "pthread error", "error=%d", ret, NULL); \ + "error=%d", ret, NULL); \ ret = -1; \ pthread_mutex_unlock(&mutex); \ goto label; \ diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h index ca50ccb149e..cb0e16c85d8 100644 --- a/xlators/features/changelog/src/changelog-messages.h +++ b/xlators/features/changelog/src/changelog-messages.h @@ -24,7 +24,7 @@ */ GLFS_MSGID( - CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_NO_MEMORY, + CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_BARRIER_FOP_FAILED, CHANGELOG_MSG_VOL_MISCONFIGURED, CHANGELOG_MSG_RENAME_ERROR, CHANGELOG_MSG_READ_ERROR, CHANGELOG_MSG_HTIME_ERROR, CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, @@ -37,11 +37,11 @@ GLFS_MSGID( CHANGELOG_MSG_FSYNC_OP_FAILED, CHANGELOG_MSG_TOTAL_LOG_INFO, CHANGELOG_MSG_SNAP_INFO, CHANGELOG_MSG_SELECT_FAILED, CHANGELOG_MSG_FCNTL_FAILED, CHANGELOG_MSG_BNOTIFY_INFO, - CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_NOT_ACTIVE, + CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, CHANGELOG_MSG_LOCAL_INIT_FAILED, CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, CHANGELOG_MSG_HANDLE_PROBE_ERROR, CHANGELOG_MSG_SET_FD_CONTEXT, CHANGELOG_MSG_FREEUP_FAILED, - CHANGELOG_MSG_HTIME_INFO, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, + CHANGELOG_MSG_RECONFIGURE, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, CHANGELOG_MSG_RPC_BUILD_ERROR, CHANGELOG_MSG_RPC_CONNECT_ERROR, CHANGELOG_MSG_RPC_START_ERROR, CHANGELOG_MSG_BUFFER_STARVATION_ERROR, CHANGELOG_MSG_SCAN_DIR_FAILED, CHANGELOG_MSG_FSETXATTR_FAILED, @@ -53,6 +53,120 @@ GLFS_MSGID( CHANGELOG_MSG_STRSTR_OP_FAILED, CHANGELOG_MSG_UNLINK_OP_FAILED, CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, - CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED); + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, CHANGELOG_MSG_MEMORY_INIT_FAILED, + CHANGELOG_MSG_NO_MEMORY, CHANGELOG_MSG_HTIME_STAT_ERROR, + CHANGELOG_MSG_HTIME_CURRENT_ERROR, CHANGELOG_MSG_BNOTIFY_COND_INFO, + CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT, + CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR, + CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED, + CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0, + CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY, + CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, + CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE, + CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET, + CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, + CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED, + CHANGELOG_MSG_CLEANUP_ALREADY_SET); +#define CHANGELOG_MSG_BARRIER_FOP_FAILED_STR \ + "failed to barrier FOPs, disabling changelog barrier" +#define CHANGELOG_MSG_MEMORY_INIT_FAILED_STR "memory accounting init failed" +#define CHANGELOG_MSG_NO_MEMORY_STR "failed to create local memory pool" +#define CHANGELOG_MSG_ENTRY_BUF_INFO_STR \ + "Entry cannot be captured for gfid, Capturing DATA entry." +#define CHANGELOG_MSG_PTHREAD_ERROR_STR "pthread error" +#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED_STR "pthread_mutex_init failed" +#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED_STR "pthread_cond_init failed" +#define CHANGELOG_MSG_HTIME_ERROR_STR "failed to update HTIME file" +#define CHANGELOG_MSG_HTIME_STAT_ERROR_STR "unable to stat htime file" +#define CHANGELOG_MSG_HTIME_CURRENT_ERROR_STR "Error extracting HTIME_CURRENT." +#define CHANGELOG_MSG_UNLINK_OP_FAILED_STR "error unlinking empty changelog" +#define CHANGELOG_MSG_RENAME_ERROR_STR "error renaming" +#define CHANGELOG_MSG_MKDIR_ERROR_STR "unable to create directory" +#define CHANGELOG_MSG_BNOTIFY_INFO_STR \ + "Explicit rollover changelog signaling bnotify" +#define CHANGELOG_MSG_BNOTIFY_COND_INFO_STR "Woke up: bnotify conditional wait" +#define CHANGELOG_MSG_RECONFIGURE_STR "Reconfigure: Changelog Enable" +#define CHANGELOG_MSG_NO_HTIME_CURRENT_STR \ + "HTIME_CURRENT not found. Changelog enabled before init" +#define CHANGELOG_MSG_HTIME_CURRENT_STR "HTIME_CURRENT" +#define CHANGELOG_MSG_NEW_HTIME_FILE_STR \ + "Changelog enable: Creating new HTIME file" +#define CHANGELOG_MSG_FGETXATTR_FAILED_STR "fgetxattr failed" +#define CHANGELOG_MSG_TOTAL_LOG_INFO_STR "changelog info" +#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED_STR "pthread cond wait failed" +#define CHANGELOG_MSG_INODE_NOT_FOUND_STR "inode not found" +#define CHANGELOG_MSG_READLINK_OP_FAILED_STR \ + "could not read the link from the gfid handle" +#define CHANGELOG_MSG_OPEN_FAILED_STR "unable to open file" +#define CHANGELOG_MSG_RPC_CONNECT_ERROR_STR "failed to connect back" +#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR_STR \ + "Failed to get buffer for RPC dispatch" +#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED_STR "could not cancel thread" +#define CHANGELOG_MSG_FSTAT_OP_FAILED_STR "Could not stat (CHANGELOG)" +#define CHANGELOG_MSG_LSEEK_OP_FAILED_STR "Could not lseek (changelog)" +#define CHANGELOG_MSG_PATH_NOT_FOUND_STR \ + "Could not find CHANGELOG in changelog path" +#define CHANGELOG_MSG_FSYNC_OP_FAILED_STR "fsync failed" +#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED_STR \ + "Error detecting empty changelog" +#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED_STR \ + "Fail snapshot because of previous errors" +#define CHANGELOG_MSG_SCAN_DIR_FAILED_STR "scandir failed" +#define CHANGELOG_MSG_FSETXATTR_FAILED_STR "fsetxattr failed" +#define CHANGELOG_MSG_XATTR_INIT_FAILED_STR "Htime xattr initialization failed" +#define CHANGELOG_MSG_SNAP_INFO_STR "log in call path" +#define CHANGELOG_MSG_WRITE_FAILED_STR "error writing to disk" +#define CHANGELOG_MSG_WROTE_TO_CSNAP_STR "Successfully wrote to csnap" +#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)" +#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify" +#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed" +#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event" +#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \ + "changelog local initialization failed" +#define CHANGELOG_MSG_GET_BUFFER_FAILED_STR "Failed to get buffer" +#define CHANGELOG_MSG_SET_FD_CONTEXT_STR \ + "could not set fd context(for release cbk)" +#define CHANGELOG_MSG_DICT_GET_FAILED_STR "Barrier failed" +#define CHANGELOG_MSG_BARRIER_STATE_NOTIFY_STR "Barrier notification" +#define CHANGELOG_MSG_BARRIER_ERROR_STR \ + "Received another barrier off notification while already off" +#define CHANGELOG_MSG_BARRIER_DISABLED_STR "disabled changelog barrier" +#define CHANGELOG_MSG_BARRIER_ALREADY_DISABLED_STR \ + "Changelog barrier already disabled" +#define CHANGELOG_MSG_BARRIER_ON_ERROR_STR \ + "Received another barrier on notification when last one is not served yet" +#define CHANGELOG_MSG_BARRIER_ENABLE_STR "Enabled changelog barrier" +#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found" +#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \ + "Something went wrong in dict_get_str_boolean" +#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set" +#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper" +#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \ + "translator needs a single subvolume" +#define CHANGELOG_MSG_VOL_MISCONFIGURED_STR \ + "dangling volume. please check volfile" +#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_STR \ + "Dequeuing all the changelog barriered fops" +#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED_STR \ + "Dequeuing changelog barriered fops is finished" +#define CHANGELOG_MSG_BARRIER_TIMEOUT_STR \ + "Disabling changelog barrier because of the timeout" +#define CHANGELOG_MSG_TIMEOUT_ADD_FAILED_STR \ + "Couldn't add changelog barrier timeout event" +#define CHANGELOG_MSG_RPC_BUILD_ERROR_STR "failed to build rpc options" +#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED_STR "failed to register notify" +#define CHANGELOG_MSG_RPC_START_ERROR_STR "failed to start rpc" +#define CHANGELOG_MSG_CREATE_FRAME_FAILED_STR "failed to create frame" +#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED_STR "failed to serialize reply" +#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED_STR "cannot register program" +#define CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE_STR \ + "Changelog is not active, return success" +#define CHANGELOG_MSG_PUT_BUFFER_FAILED_STR \ + "failed to put buffer after consumption" +#define CHANGELOG_MSG_CLEANUP_ALREADY_SET_STR \ + "cleanup_starting flag is already set for xl" +#define CHANGELOG_MSG_HANDLE_PROBE_ERROR_STR "xdr decoding error" #endif /* !_CHANGELOG_MESSAGES_H_ */ diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c index cf35175c3bc..125246a17e1 100644 --- a/xlators/features/changelog/src/changelog-rpc-common.c +++ b/xlators/features/changelog/src/changelog-rpc-common.c @@ -28,7 +28,7 @@ changelog_rpc_poller(void *arg) { xlator_t *this = arg; - (void)event_dispatch(this->ctx->event_pool); + (void)gf_event_dispatch(this->ctx->event_pool); return NULL; } @@ -47,10 +47,10 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, if (!options) goto error_return; - ret = rpc_transport_unix_options_build(&options, sockfile, 0); + ret = rpc_transport_unix_options_build(options, sockfile, 0); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR, - "failed to build rpc options"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR, + NULL); goto dealloc_dict; } @@ -60,19 +60,19 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, ret = rpc_clnt_register_notify(rpc, fn, cbkdata); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, - "failed to register notify"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL); goto dealloc_rpc_clnt; } ret = rpc_clnt_start(rpc); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, - "failed to start rpc"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, + NULL); goto dealloc_rpc_clnt; } + dict_unref(options); return rpc; dealloc_rpc_clnt: @@ -164,8 +164,8 @@ changelog_invoke_rpc(xlator_t *this, struct rpc_clnt *rpc, frame = create_frame(this, this->ctx->pool); if (!frame) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED, - "failed to create frame"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED, + NULL); goto error_return; } @@ -238,8 +238,8 @@ changelog_rpc_sumbit_reply(rpcsvc_request_t *req, void *arg, iob = __changelog_rpc_serialize_reply(req, arg, &iov, xdrproc); if (!iob) - gf_msg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, - "failed to serialize reply"); + gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, + NULL); else iobref_add(iobref, iob); @@ -262,6 +262,9 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile, struct rpcsvc_program *prog = NULL; rpc_transport_t *trans = NULL; + if (!rpc) + return; + while (*progs) { prog = *progs; (void)rpcsvc_program_unregister(rpc, prog); @@ -303,22 +306,25 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata, if (!cbkdata) cbkdata = this; - ret = rpcsvc_transport_unix_options_build(&options, sockfile); + options = dict_new(); + if (!options) + return NULL; + + ret = rpcsvc_transport_unix_options_build(options, sockfile); if (ret) goto dealloc_dict; rpc = rpcsvc_init(this, this->ctx, options, 8); if (rpc == NULL) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, - "failed to init rpc"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, + NULL); goto dealloc_dict; } ret = rpcsvc_register_notify(rpc, fn, cbkdata); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, - "failed to register notify function"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL); goto dealloc_rpc; } @@ -332,11 +338,10 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata, prog = *progs; ret = rpcsvc_program_register(rpc, prog, _gf_false); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, - "cannot register program " - "(name: %s, prognum: %d, pogver: %d)", - prog->progname, prog->prognum, prog->progver); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, "name%s", + prog->progname, "prognum=%d", prog->prognum, "pogver=%d", + prog->progver, NULL); goto dealloc_rpc; } diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c index 28974fe0999..440b88091a6 100644 --- a/xlators/features/changelog/src/changelog-rpc.c +++ b/xlators/features/changelog/src/changelog-rpc.c @@ -13,7 +13,7 @@ #include "changelog-mem-types.h" #include "changelog-ev-handle.h" -struct rpcsvc_program *changelog_programs[]; +static struct rpcsvc_program *changelog_programs[]; static void changelog_cleanup_dispatchers(xlator_t *this, changelog_priv_t *priv, int count) @@ -69,9 +69,6 @@ changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf, int j = 0; int ret = 0; changelog_clnt_t *conn = NULL; - char thread_name[GF_THREAD_NAMEMAX] = { - 0, - }; conn = &priv->connections; @@ -111,9 +108,9 @@ changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf, /* spawn dispatcher threads */ for (; j < nr_dispatchers; j++) { - snprintf(thread_name, sizeof(thread_name), "clogd%03hx", (j & 0x3ff)); ret = gf_thread_create(&priv->ev_dispatcher[j], NULL, - changelog_ev_dispatch, conn, thread_name); + changelog_ev_dispatch, conn, "clogd%03hx", + j & 0x3ff); if (ret != 0) { changelog_cleanup_dispatchers(this, priv, j); break; @@ -382,16 +379,15 @@ changelog_handle_probe(rpcsvc_request_t *req) this = req->trans->xl; if (this->cleanup_starting) { - gf_msg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, - "cleanup_starting flag is already set for xl"); + gf_smsg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_CLEANUP_ALREADY_SET, + NULL); return 0; } ret = xdr_to_generic(req->msg[0], &rpc_req, (xdrproc_t)xdr_changelog_probe_req); if (ret < 0) { - gf_msg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, - "xdr decoding error"); + gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, NULL); req->rpc_err = GARBAGE_ARGS; goto handle_xdr_error; } @@ -423,13 +419,13 @@ submit_rpc: * RPC declarations */ -rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = { +static rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = { [CHANGELOG_RPC_PROBE_FILTER] = {"CHANGELOG PROBE FILTER", - CHANGELOG_RPC_PROBE_FILTER, - changelog_handle_probe, NULL, 0, DRC_NA}, + changelog_handle_probe, NULL, + CHANGELOG_RPC_PROBE_FILTER, DRC_NA, 0}, }; -struct rpcsvc_program changelog_svc_prog = { +static struct rpcsvc_program changelog_svc_prog = { .progname = CHANGELOG_RPC_PROGNAME, .prognum = CHANGELOG_RPC_PROGNUM, .progver = CHANGELOG_RPC_PROGVER, @@ -438,7 +434,7 @@ struct rpcsvc_program changelog_svc_prog = { .synctask = _gf_true, }; -struct rpcsvc_program *changelog_programs[] = { +static struct rpcsvc_program *changelog_programs[] = { &changelog_svc_prog, NULL, }; diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 1f22a97a6e9..6a6e5af859e 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = { }, }; +static int +changelog_init_rpc(xlator_t *this, changelog_priv_t *priv); + +static int +changelog_init(xlator_t *this, changelog_priv_t *priv); + /* Entry operations - TYPE III */ /** @@ -149,9 +155,8 @@ changelog_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, goto out; } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", - "fop=rmdir", NULL); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rmdir", NULL); chlog_barrier_dequeue_all(this, &queue); } @@ -298,9 +303,8 @@ changelog_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, goto out; } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", - "fop=unlink", NULL); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=unlink", NULL); chlog_barrier_dequeue_all(this, &queue); } @@ -418,9 +422,8 @@ changelog_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, goto out; } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", - "fop=rename", NULL); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rename", NULL); chlog_barrier_dequeue_all(this, &queue); } /* changelog barrier */ @@ -531,8 +534,7 @@ changelog_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=link", NULL); chlog_barrier_dequeue_all(this, &queue); } @@ -660,9 +662,8 @@ changelog_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", - "fop=mkdir", NULL); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mkdir", NULL); chlog_barrier_dequeue_all(this, &queue); } @@ -782,9 +783,8 @@ changelog_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", - "fop=symlink", NULL); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=symlink", NULL); chlog_barrier_dequeue_all(this, &queue); } @@ -929,9 +929,8 @@ changelog_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", - "fop=mknod", NULL); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mknod", NULL); chlog_barrier_dequeue_all(this, &queue); } @@ -972,8 +971,8 @@ changelog_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, CHANGELOG_OP_TYPE_RELEASE)) { ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, - "could not set fd context (for release cbk)"); + gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, + NULL); } changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); @@ -1083,9 +1082,8 @@ changelog_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, } if (barrier_enabled && !stub) { - gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier", - "fop=create", NULL); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=create", NULL); chlog_barrier_dequeue_all(this, &queue); } @@ -1388,9 +1386,6 @@ changelog_handle_virtual_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ret = changelog_fill_entry_buf(frame, this, loc, &local); if (ret) { gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_ENTRY_BUF_INFO, - "Entry cannot be" - " captured for gfid, Capturing DATA" - " entry.", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto unwind; } @@ -1806,8 +1801,8 @@ changelog_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, CHANGELOG_OP_TYPE_RELEASE)) { ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, - "could not set fd context (for release cbk)"); + gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, + NULL); } unwind: @@ -2008,6 +2003,11 @@ notify(xlator_t *this, int event, void *data, ...) uint64_t clntcnt = 0; changelog_clnt_t *conn = NULL; gf_boolean_t cleanup_notify = _gf_false; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; INIT_LIST_HEAD(&queue); @@ -2021,23 +2021,40 @@ notify(xlator_t *this, int event, void *data, ...) "cleanup changelog rpc connection of brick %s", priv->victim->name); - this->cleanup_starting = 1; - changelog_destroy_rpc_listner(this, priv); - conn = &priv->connections; - if (conn) - changelog_ev_cleanup_connections(this, conn); - xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); - clntcnt = GF_ATOMIC_GET(priv->clntcnt); - - if (!xprtcnt && !clntcnt) { - LOCK(&priv->lock); - { - cleanup_notify = priv->notify_down; - priv->notify_down = _gf_true; + if (priv->rpc_active) { + this->cleanup_starting = 1; + changelog_destroy_rpc_listner(this, priv); + conn = &priv->connections; + if (conn) + changelog_ev_cleanup_connections(this, conn); + xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); + clntcnt = GF_ATOMIC_GET(priv->clntcnt); + if (!xprtcnt && !clntcnt) { + LOCK(&priv->lock); + { + cleanup_notify = priv->notify_down; + priv->notify_down = _gf_true; + } + UNLOCK(&priv->lock); + if (priv->rpc) { + list_for_each_entry_safe(listener, next, + &priv->rpc->listeners, list) + { + if (listener->trans) { + rpc_transport_unref(listener->trans); + } + } + rpcsvc_destroy(priv->rpc); + priv->rpc = NULL; + } + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, + UNIX_PATH_MAX); + sys_unlink(sockfile); + if (!cleanup_notify) + default_notify(this, GF_EVENT_PARENT_DOWN, data); } - UNLOCK(&priv->lock); - if (!cleanup_notify) - default_notify(this, GF_EVENT_PARENT_DOWN, data); + } else { + default_notify(this, GF_EVENT_PARENT_DOWN, data); } goto out; } @@ -2049,15 +2066,15 @@ notify(xlator_t *this, int event, void *data, ...) switch (barrier) { case DICT_ERROR: - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_DICT_GET_FAILED, - "Barrier dict_get_str_boolean failed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_DICT_GET_FAILED, "dict_get_str_boolean", + NULL); ret = -1; goto out; case BARRIER_OFF: - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, - "Barrier off notification"); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "off", NULL); CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); LOCK(&priv->c_snap_lock); @@ -2074,10 +2091,8 @@ notify(xlator_t *this, int event, void *data, ...) UNLOCK(&priv->bflags.lock); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Received another barrier off" - " notification while already off"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ERROR, NULL); goto out; } @@ -2095,13 +2110,11 @@ notify(xlator_t *this, int event, void *data, ...) */ if (ret == 0) { chlog_barrier_dequeue_all(this, &queue); - gf_msg(this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Disabled changelog barrier"); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_DISABLED, NULL); } else { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Changelog barrier already disabled"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, NULL); } LOCK(&priv->bflags.lock); @@ -2113,8 +2126,8 @@ notify(xlator_t *this, int event, void *data, ...) goto out; case BARRIER_ON: - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, - "Barrier on notification"); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "on", NULL); CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); LOCK(&priv->c_snap_lock); @@ -2133,11 +2146,8 @@ notify(xlator_t *this, int event, void *data, ...) UNLOCK(&priv->bflags.lock); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Received another barrier on" - "notification when last one is" - "not served yet"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ON_ERROR, NULL); goto out; } @@ -2160,14 +2170,14 @@ notify(xlator_t *this, int event, void *data, ...) goto out; } - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, - "Enabled changelog barrier"); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_ENABLE, NULL); ret = changelog_barrier_notify(priv, buf); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_WRITE_FAILED, - "Explicit roll over: write failed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_WRITE_FAILED, "Explicit roll over", + NULL); changelog_barrier_cleanup(this, priv, &queue); ret = -1; goto out; @@ -2191,21 +2201,20 @@ notify(xlator_t *this, int event, void *data, ...) } ret1 = pthread_mutex_unlock(&priv->bn.bnotify_mutex); CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret1, out, bclean_req); - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO, - "Woke up: bnotify conditional wait"); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BNOTIFY_COND_INFO, NULL); goto out; case DICT_DEFAULT: - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_DICT_GET_FAILED, "barrier key not found"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, NULL); ret = -1; goto out; default: - gf_msg(this->name, GF_LOG_ERROR, EINVAL, - CHANGELOG_MSG_DICT_GET_FAILED, - "Something went bad in dict_get_str_boolean"); + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, + CHANGELOG_MSG_ERROR_IN_DICT_GET, NULL); ret = -1; goto out; } @@ -2231,9 +2240,8 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_changelog_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "Memory accounting" - " init failed"); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + CHANGELOG_MSG_MEMORY_INIT_FAILED, NULL); return ret; } @@ -2244,23 +2252,11 @@ static int changelog_init(xlator_t *this, changelog_priv_t *priv) { int i = 0; - int ret = -1; - struct timeval tv = { - 0, - }; + int ret = 0; changelog_log_data_t cld = { 0, }; - ret = gettimeofday(&tv, NULL); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_GET_TIME_OP_FAILED, "gettimeofday() failure"); - goto out; - } - - priv->slice.tv_start = tv; - priv->maps[CHANGELOG_TYPE_DATA] = "D "; priv->maps[CHANGELOG_TYPE_METADATA] = "M "; priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M "; @@ -2279,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv) * in case there was an encoding change. so... things are kept * simple here. */ - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, _gf_false); ret = htime_open(this, priv, cld.cld_roll_time); /* call htime open with cld's rollover_time */ @@ -2319,8 +2313,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, - "bnotify pthread_mutex_init failed", "ret=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=bnotify", + "ret=%d", ret, NULL); ret = -1; goto out; } @@ -2328,8 +2322,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, - "bnotify pthread_cond_init failed", "ret=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=bnotify", + "ret=%d", ret, NULL); ret = -1; goto out; } @@ -2337,8 +2331,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, - "drain_black pthread_mutex_init failed", "ret=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_black", + "ret=%d", ret, NULL); ret = -1; goto out; } @@ -2346,8 +2340,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, - "drain_black pthread_cond_init failed", "ret=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_black", + "ret=%d", ret, NULL); ret = -1; goto out; } @@ -2355,8 +2349,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, - "drain_white pthread_mutex_init failed", "ret=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_white", + "ret=%d", ret, NULL); ret = -1; goto out; } @@ -2364,8 +2358,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, - "drain_white pthread_cond_init failed", "ret=%d", ret, NULL); + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_white", + "ret=%d", ret, NULL); ret = -1; goto out; } @@ -2374,7 +2368,7 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) if ((pthread_mutex_init(&priv->cr.lock, NULL)) != 0) { gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, - "changelog_rollover lock init failed", "ret=%d", ret, NULL); + "name=changelog_rollover", "ret=%d", ret, NULL); ret = -1; goto out; } @@ -2425,6 +2419,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv) LOCK_DESTROY(&priv->bflags.lock); } +static void +changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) +{ + /* terminate rpc server */ + if (!this->cleanup_starting) + changelog_destroy_rpc_listner(this, priv); + + (void)changelog_cleanup_rpc_threads(this, priv); + /* cleanup rot buffs */ + rbuf_dtor(priv->rbuf); + + /* cleanup poller thread */ + if (priv->poller) + (void)changelog_thread_cleanup(this, priv->poller); +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -2433,6 +2443,9 @@ reconfigure(xlator_t *this, dict_t *options) changelog_priv_t *priv = NULL; gf_boolean_t active_earlier = _gf_true; gf_boolean_t active_now = _gf_true; + gf_boolean_t rpc_active_earlier = _gf_true; + gf_boolean_t rpc_active_now = _gf_true; + gf_boolean_t iniate_rpc = _gf_false; changelog_time_slice_t *slice = NULL; changelog_log_data_t cld = { 0, @@ -2443,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options) char csnap_dir[PATH_MAX] = { 0, }; - struct timeval tv = { - 0, - }; uint32_t timeout = 0; priv = this->private; @@ -2454,14 +2464,15 @@ reconfigure(xlator_t *this, dict_t *options) ret = -1; active_earlier = priv->active; + rpc_active_earlier = priv->rpc_active; /* first stop the rollover and the fsync thread */ changelog_cleanup_helper_threads(this, priv); GF_OPTION_RECONF("changelog-dir", tmp, options, str, out); if (!tmp) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, - "\"changelog-dir\" option is not set"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, + NULL); goto out; } @@ -2487,6 +2498,29 @@ reconfigure(xlator_t *this, dict_t *options) goto out; GF_OPTION_RECONF("changelog", active_now, options, bool, out); + GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool, + out); + + /* If journalling is enabled, enable rpc notifications */ + if (active_now && !active_earlier) { + if (!rpc_active_earlier) + iniate_rpc = _gf_true; + } + + if (rpc_active_now && !rpc_active_earlier) { + iniate_rpc = _gf_true; + } + + /* TODO: Disable of changelog-notifications is not supported for now + * as there is no clean way of cleaning up of rpc resources + */ + + if (iniate_rpc) { + ret = changelog_init_rpc(this, priv); + if (ret) + goto out; + priv->rpc_active = _gf_true; + } /** * changelog_handle_change() handles changes that could possibly @@ -2513,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options) out); if (active_now || active_earlier) { - ret = changelog_fill_rollover_data(&cld, !active_now); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, !active_now); slice = &priv->slice; @@ -2532,15 +2564,9 @@ reconfigure(xlator_t *this, dict_t *options) if (active_now) { if (!active_earlier) { - gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO, - "Reconfigure: Changelog Enable"); - if (gettimeofday(&tv, NULL)) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_HTIME_ERROR, "unable to fetch htime"); - ret = -1; - goto out; - } - htime_create(this, priv, tv.tv_sec); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE, + NULL); + htime_create(this, priv, gf_time()); } ret = changelog_spawn_helper_threads(this, priv); } @@ -2565,8 +2591,7 @@ changelog_freeup_options(xlator_t *this, changelog_priv_t *priv) ret = priv->cb->dtor(this, &priv->cd); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED, - "could not cleanup bootstrapper"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED, NULL); GF_FREE(priv->changelog_brick); GF_FREE(priv->changelog_dir); } @@ -2618,6 +2643,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv) goto dealloc_2; GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2); + GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2); GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2); GF_OPTION_INIT("op-mode", tmp, str, dealloc_2); @@ -2656,22 +2682,6 @@ error_return: return -1; } -static void -changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) -{ - /* terminate rpc server */ - if (!this->cleanup_starting) - changelog_destroy_rpc_listner(this, priv); - - (void)changelog_cleanup_rpc_threads(this, priv); - /* cleanup rot buffs */ - rbuf_dtor(priv->rbuf); - - /* cleanup poller thread */ - if (priv->poller) - (void)changelog_thread_cleanup(this, priv->poller); -} - static int changelog_init_rpc(xlator_t *this, changelog_priv_t *priv) { @@ -2712,14 +2722,14 @@ init(xlator_t *this) GF_VALIDATE_OR_GOTO("changelog", this, error_return); if (!this->children || this->children->next) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED, - "translator needs a single subvolume"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED, + NULL); goto error_return; } if (!this->parents) { - gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED, - "dangling volume. please check volfile"); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED, + NULL); goto error_return; } @@ -2729,8 +2739,8 @@ init(xlator_t *this) this->local_pool = mem_pool_new(changelog_local_t, 64); if (!this->local_pool) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, - "failed to create local memory pool"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, + NULL); goto cleanup_priv; } @@ -2740,6 +2750,7 @@ init(xlator_t *this) GF_ATOMIC_INIT(priv->clntcnt, 0); GF_ATOMIC_INIT(priv->xprtcnt, 0); INIT_LIST_HEAD(&priv->xprt_list); + priv->htime_fd = -1; ret = changelog_init_options(this, priv); if (ret) @@ -2767,10 +2778,13 @@ init(xlator_t *this) INIT_LIST_HEAD(&priv->queue); priv->barrier_enabled = _gf_false; - /* RPC ball rolling.. */ - ret = changelog_init_rpc(this, priv); - if (ret) - goto cleanup_barrier; + if (priv->rpc_active || priv->active) { + /* RPC ball rolling.. */ + ret = changelog_init_rpc(this, priv); + if (ret) + goto cleanup_barrier; + priv->rpc_active = _gf_true; + } ret = changelog_init(this, priv); if (ret) @@ -2782,13 +2796,16 @@ init(xlator_t *this) return 0; cleanup_rpc: - changelog_cleanup_rpc(this, priv); + if (priv->rpc_active) { + changelog_cleanup_rpc(this, priv); + } cleanup_barrier: changelog_barrier_pthread_destroy(priv); cleanup_options: changelog_freeup_options(this, priv); cleanup_mempool: mem_pool_destroy(this->local_pool); + this->local_pool = NULL; cleanup_priv: GF_FREE(priv); error_return: @@ -2807,9 +2824,11 @@ fini(xlator_t *this) priv = this->private; if (priv) { - /* terminate RPC server/threads */ - changelog_cleanup_rpc(this, priv); - + if (priv->active || priv->rpc_active) { + /* terminate RPC server/threads */ + changelog_cleanup_rpc(this, priv); + GF_FREE(priv->ev_dispatcher); + } /* call barrier_disable to cancel timer */ if (priv->barrier_enabled) __chlog_barrier_disable(this, &queue); @@ -2878,6 +2897,13 @@ struct volume_options options[] = { .flags = OPT_FLAG_SETTABLE, .level = OPT_STATUS_BASIC, .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"changelog-notification"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable changelog live notification", + .op_version = {3}, + .level = OPT_STATUS_BASIC, + .tags = {"bitrot", "georep"}}, {.key = {"changelog-brick"}, .type = GF_OPTION_TYPE_PATH, .description = "brick path to generate unique socket file name." diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am index 0c3966c968b..e2a277e372b 100644 --- a/xlators/features/cloudsync/src/Makefile.am +++ b/xlators/features/cloudsync/src/Makefile.am @@ -21,9 +21,9 @@ cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources) nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h BUILT_SOURCES = cloudsync-autogen-fops.h -cloudsync_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS) +cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) -cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL) AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ -DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\" diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c index aee1f06a82a..445a31b90e7 100644 --- a/xlators/features/cloudsync/src/cloudsync-common.c +++ b/xlators/features/cloudsync/src/cloudsync-common.c @@ -11,6 +11,20 @@ #include "cloudsync-common.h" void +cs_xattrinfo_wipe(cs_local_t *local) +{ + if (local->xattrinfo.lxattr) { + if (local->xattrinfo.lxattr->file_path) + GF_FREE(local->xattrinfo.lxattr->file_path); + + if (local->xattrinfo.lxattr->volname) + GF_FREE(local->xattrinfo.lxattr->volname); + + GF_FREE(local->xattrinfo.lxattr); + } +} + +void cs_local_wipe(xlator_t *this, cs_local_t *local) { if (!local) @@ -40,5 +54,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local) if (local->remotepath) GF_FREE(local->remotepath); + cs_xattrinfo_wipe(local); + mem_put(local); } diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h index 7b3520ce9be..11d233460a4 100644 --- a/xlators/features/cloudsync/src/cloudsync-common.h +++ b/xlators/features/cloudsync/src/cloudsync-common.h @@ -14,9 +14,23 @@ #include <glusterfs/call-stub.h> #include <glusterfs/xlator.h> #include <glusterfs/syncop.h> +#include <glusterfs/compat-errno.h> #include "cloudsync-mem-types.h" #include "cloudsync-messages.h" +typedef struct cs_loc_xattr { + char *file_path; + uuid_t uuid; + uuid_t gfid; + char *volname; +} cs_loc_xattr_t; + +typedef struct cs_size_xattr { + uint64_t size; + uint64_t blksize; + uint64_t blocks; +} cs_size_xattr_t; + typedef struct cs_local { loc_t loc; fd_t *fd; @@ -34,10 +48,25 @@ typedef struct cs_local { int call_cnt; inode_t *inode; char *remotepath; + + struct { + /* offset, flags and size are the information needed + * by read fop for remote read operation. These will be + * populated in cloudsync read fop, before being passed + * on to the plugin performing remote read. + */ + off_t offset; + uint32_t flags; + size_t size; + cs_loc_xattr_t *lxattr; + } xattrinfo; + } cs_local_t; typedef int (*fop_download_t)(call_frame_t *frame, void *config); +typedef int (*fop_remote_read_t)(call_frame_t *, void *); + typedef void *(*store_init)(xlator_t *this); typedef int (*store_reconfigure)(xlator_t *this, dict_t *options); @@ -48,6 +77,7 @@ struct cs_remote_stores { char *name; /* store name */ void *config; /* store related information */ fop_download_t dlfop; /* store specific download function */ + fop_remote_read_t rdfop; /* store specific read function */ store_init init; /* store init to initialize store config */ store_reconfigure reconfigure; /* reconfigure store config */ store_fini fini; @@ -59,11 +89,15 @@ typedef struct cs_private { struct cs_remote_stores *stores; gf_boolean_t abortdl; pthread_spinlock_t lock; + gf_boolean_t remote_read; } cs_private_t; void cs_local_wipe(xlator_t *this, cs_local_t *local); +void +cs_xattrinfo_wipe(cs_local_t *local); + #define CS_STACK_UNWIND(fop, frame, params...) \ do { \ cs_local_t *__local = NULL; \ @@ -90,6 +124,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local); typedef struct store_methods { int (*fop_download)(call_frame_t *frame, void *config); + int (*fop_remote_read)(call_frame_t *, void *); /* return type should be the store config */ void *(*fop_init)(xlator_t *this); int (*fop_reconfigure)(xlator_t *this, dict_t *options); diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py index 3122bd32c01..c27df97ae58 100755 --- a/xlators/features/cloudsync/src/cloudsync-fops-c.py +++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py @@ -35,11 +35,19 @@ cs_@NAME@ (call_frame_t *frame, xlator_t *this, __cs_inode_ctx_get (this, fd->inode, &ctx); if (ctx) - state = __cs_get_file_state (this, fd->inode, ctx); + state = __cs_get_file_state (fd->inode, ctx); else state = GF_CS_LOCAL; - local->xattr_req = xdata ? dict_ref (xdata) : (xdata = dict_new ()); + xdata = xdata ? dict_ref (xdata) : dict_new (); + + if (!xdata) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + op_errno = ENOMEM; + goto err; + } + + local->xattr_req = xdata; ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); if (ret) { @@ -137,15 +145,15 @@ cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } else { __cs_inode_ctx_update (this, fd->inode, val); gf_msg (this->name, GF_LOG_INFO, 0, 0, - " state = %ld", val); + " state = %" PRIu64, val); if (local->call_cnt == 1 && (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { gf_msg (this->name, GF_LOG_INFO, 0, 0, " will repair and download " - "the file, current state : %ld", - val); + "the file, current state : %" + PRIu64, val); goto repair; } else { gf_msg (this->name, GF_LOG_ERROR, 0, 0, @@ -187,19 +195,29 @@ int32_t cs_@NAME@ (call_frame_t *frame, xlator_t *this, @LONG_ARGS@) { + int op_errno = EINVAL; cs_local_t *local = NULL; int ret = 0; local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@); if (!local) { gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL"); + op_errno = ENOMEM; goto err; } if (loc->inode->ia_type == IA_IFDIR) goto wind; - local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); + xdata = xdata ? dict_ref (xdata) : dict_new (); + + if (!xdata) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + op_errno = ENOMEM; + goto err; + } + + local->xattr_req = xdata; ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); if (ret) { @@ -215,7 +233,7 @@ wind: return 0; err: - CS_STACK_UNWIND (@NAME@, frame, -1, errno, @CBK_ERROR_ARGS@); + CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@); return 0; } @@ -274,7 +292,7 @@ fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate', # These are the current actual lists used to generate the code # The following list contains fops which are fd based that modifies data -fd_data_modify_op_fop_template = ['readv', 'writev', 'flush', 'fsync', +fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync', 'ftruncate', 'rchecksum', 'fallocate', 'discard', 'zerofill', 'seek'] @@ -284,8 +302,8 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr', 'getattr'] # These fops need a separate implementation -special_fops = ['readdirp', 'statfs', 'setxattr', 'unlink', 'getxattr', - 'truncate', 'fstat'] +special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr', + 'truncate', 'fstat', 'readv', 'readdirp'] def gen_defaults(): for name in ops: diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h index 9e6837a1d3d..220346405d0 100644 --- a/xlators/features/cloudsync/src/cloudsync-mem-types.h +++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h @@ -16,6 +16,7 @@ enum cs_mem_types_ { gf_cs_mt_cs_private_t = gf_common_mt_end + 1, gf_cs_mt_cs_remote_stores_t, gf_cs_mt_cs_inode_ctx_t, + gf_cs_mt_cs_lxattr_t, gf_cs_mt_end }; #endif /* __CLOUDSYNC_MEM_TYPES_H__ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am index 4deefb651eb..fb6b0580c6d 100644 --- a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am @@ -2,6 +2,10 @@ if BUILD_AMAZONS3_PLUGIN AMAZONS3_DIR = cloudsyncs3 endif -SUBDIRS = ${AMAZONS3_DIR} +if BUILD_CVLT_PLUGIN + CVLT_DIR = cvlt +endif + +SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR} CLEANFILES = diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c index 7680260988b..23c3599825a 100644 --- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c @@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid, int date_len = -1; int res_len = -1; - ctime = time(NULL); + ctime = gf_time(); gtime = gmtime(&ctime); date_len = strftime(httpdate, sizeof(httpdate), diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am new file mode 100644 index 00000000000..b512464f157 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am @@ -0,0 +1,12 @@ +csp_LTLIBRARIES = cloudsynccvlt.la +cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins + +cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c +cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src +noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src +CLEANFILES = + +EXTRA_DIST = libcloudsynccvlt.sym diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h new file mode 100644 index 00000000000..7230ef77337 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h @@ -0,0 +1,203 @@ +/* + Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __ARCHIVESTORE_H__ +#define __ARCHIVESTORE_H__ + +#include <stdlib.h> +#include <stddef.h> +#include <stdint.h> +#include <dlfcn.h> +#include <uuid/uuid.h> + +#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid" +#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id" +#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id" + +struct _archstore_methods; +typedef struct _archstore_methods archstore_methods_t; + +struct _archstore_desc { + void *priv; /* Private field for store mgmt. */ + /* To be used only by archive store*/ +}; +typedef struct _archstore_desc archstore_desc_t; + +struct _archstore_info { + char *id; /* Identifier for the archivestore */ + uint32_t idlen; /* Length of identifier string */ + char *prod; /* Name of the data mgmt. product */ + uint32_t prodlen; /* Length of the product string */ +}; +typedef struct _archstore_info archstore_info_t; + +struct _archstore_fileinfo { + uuid_t uuid; /* uuid of the file */ + char *path; /* file path */ + uint32_t pathlength; /* length of file path */ +}; +typedef struct _archstore_fileinfo archstore_fileinfo_t; + +struct _app_callback_info { + archstore_info_t *src_archstore; + archstore_fileinfo_t *src_archfile; + archstore_info_t *dest_archstore; + archstore_fileinfo_t *dest_archfile; +}; +typedef struct _app_callback_info app_callback_info_t; + +typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *, + void *, int64_t, int32_t); + +enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 }; +typedef enum _archstore_scan_type archstore_scan_type_t; + +typedef int32_t archstore_errno_t; + +/* + * Initialize archive store. + * arg1 pointer to structure containing archive store information + * arg2 error number if any generated during the initialization + * arg3 name of the log file + */ +typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *, + const char *); + +/* + * Clean up archive store. + * arg1 pointer to structure containing archive store information + * arg2 error number if any generated during the cleanup + */ +typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *); + +/* + * Read the contents of the file from archive store + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing archive store information + * arg3 pointer to structure containing information about file to be read + * arg4 offset in the file from which data should be read + * arg5 buffer where the data should be read + * arg6 number of bytes of data to be read + * arg7 error number if any generated during the read from file + * arg8 callback handler to be invoked after the data is read + * arg9 cookie to be passed when callback is invoked + */ +typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, off_t, char *, + size_t, archstore_errno_t *, app_callback_t, + void *); + +/* + * Restore the contents of the file from archive store + * This is basically in-place restore + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing archive store information + * arg3 pointer to structure containing information about file to be restored + * arg4 error number if any generated during the file restore + * arg5 callback to be invoked after the file is restored + * arg6 cookie to be passed when callback is invoked + */ +typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Restore the contents of the file from archive store to a different store + * This is basically out-of-place restore + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing source archive store information + * arg3 pointer to structure containing information about file to be restored + * arg4 pointer to structure containing destination archive store information + * arg5 pointer to structure containing information about the location to + which the file will be restored + * arg6 error number if any generated during the file restore + * arg7 callback to be invoked after the file is restored + * arg8 cookie to be passed when callback is invoked + */ +typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Archive the contents of the file to archive store + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing source archive store information + * arg3 pointer to structure containing information about files to be archived + * arg4 pointer to structure containing destination archive store information + * arg5 pointer to structure containing information about files that failed + * to be archived + * arg6 error number if any generated during the file archival + * arg7 callback to be invoked after the file is archived + * arg8 cookie to be passed when callback is invoked + */ +typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Backup list of files provided in the input file + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing source archive store information + * arg3 pointer to structure containing information about files to be backed up + * arg4 pointer to structure containing destination archive store information + * arg5 pointer to structure containing information about files that failed + * to be backed up + * arg6 error number if any generated during the file archival + * arg7 callback to be invoked after the file is archived + * arg8 cookie to be passed when callback is invoked + */ +typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Scan the contents of a store and determine the files which need to be + * backed up. + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing archive store information + * arg3 type of scan whether full or incremental + * arg4 path to file that contains list of files to be backed up + * arg5 error number if any generated during scan operation + */ +typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_scan_type_t, char *, + archstore_errno_t *); + +struct _archstore_methods { + init_archstore_t init; + term_archstore_t fini; + backup_archstore_t backup; + archive_archstore_t archive; + scan_archstore_t scan; + restore_archstore_t restore; + recall_archstore_t recall; + read_archstore_t read; +}; + +typedef int (*get_archstore_methods_t)(archstore_methods_t *); + +/* + * Single function that will be invoked by applications for extracting + * the function pointers to all data management functions. + */ +int32_t +get_archstore_methods(archstore_methods_t *); + +#endif /* End of __ARCHIVESTORE_H__ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h new file mode 100644 index 00000000000..57c9aa77da0 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _CVLT_MESSAGES_H_ +#define _CVLT_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE, + CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED, + CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED); + +#endif /* !_CVLT_MESSAGES_H_ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym new file mode 100644 index 00000000000..0bc273670d5 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym @@ -0,0 +1 @@ +store_ops diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h new file mode 100644 index 00000000000..c24fab8bfe7 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __LIBCVLT_MEM_TYPES_H__ +#define __LIBCVLT_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> +enum libcvlt_mem_types_ { + gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1, + gf_libcvlt_mt_end +}; +#endif /* __LIBCVLT_MEM_TYPES_H__ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c new file mode 100644 index 00000000000..5b7272bb448 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c @@ -0,0 +1,842 @@ +#include <stdlib.h> +#include <glusterfs/xlator.h> +#include <glusterfs/glusterfs.h> +#include "libcvlt.h" +#include "cloudsync-common.h" +#include "cvlt-messages.h" + +#define LIBARCHIVE_SO "libopenarchive.so" +#define ALIGN_SIZE 4096 +#define CVLT_TRAILER "cvltv1" + +store_methods_t store_ops = { + .fop_download = cvlt_download, + .fop_init = cvlt_init, + .fop_reconfigure = cvlt_reconfigure, + .fop_fini = cvlt_fini, + .fop_remote_read = cvlt_read, +}; + +static const int32_t num_req = 32; +static const int32_t num_iatt = 32; +static char *plugin = "cvlt_cloudSync"; + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1); + + if (ret != 0) { + return ret; + } + + return ret; +} + +static void +cvlt_free_resources(archive_t *arch) +{ + /* + * We will release all the resources that were allocated by the xlator. + * Check whether there are any buffers which have not been released + * back to a mempool. + */ + + if (arch->handle) { + dlclose(arch->handle); + } + + if (arch->iobuf_pool) { + iobuf_pool_destroy(arch->iobuf_pool); + } + + if (arch->req_pool) { + mem_pool_destroy(arch->req_pool); + arch->req_pool = NULL; + } + + return; +} + +static int32_t +cvlt_extract_store_fops(xlator_t *this, archive_t *arch) +{ + int32_t op_ret = -1; + get_archstore_methods_t get_archstore_methods; + + /* + * libopenarchive.so defines methods for performing data management + * operations. We will extract the methods from library and these + * methods will be invoked for moving data between glusterfs volume + * and the data management product. + */ + + VALIDATE_OR_GOTO(arch, err); + + arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW); + if (!arch->handle) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED, + " failed to open %s ", LIBARCHIVE_SO); + return op_ret; + } + + dlerror(); /* Clear any existing error */ + + get_archstore_methods = dlsym(arch->handle, "get_archstore_methods"); + if (!get_archstore_methods) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " Error extracting get_archstore_methods()"); + dlclose(arch->handle); + arch->handle = NULL; + return op_ret; + } + + op_ret = get_archstore_methods(&(arch->fops)); + if (op_ret) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " Failed to extract methods in get_archstore_methods"); + dlclose(arch->handle); + arch->handle = NULL; + return op_ret; + } + +err: + return op_ret; +} + +static int32_t +cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt) +{ + /* + * Initialize information about all the memory pools that will be + * used by this xlator. + */ + arch->nreqs = 0; + + arch->req_pool = NULL; + + arch->handle = NULL; + arch->xl = this; + + arch->req_pool = mem_pool_new(cvlt_request_t, num_req); + if (!arch->req_pool) { + goto err; + } + + arch->iobuf_pool = iobuf_pool_new(); + if (!arch->iobuf_pool) { + goto err; + } + + if (cvlt_extract_store_fops(this, arch)) { + goto err; + } + + return 0; + +err: + + return -1; +} + +static void +cvlt_req_init(cvlt_request_t *req) +{ + sem_init(&(req->sem), 0, 0); + + return; +} + +static void +cvlt_req_destroy(cvlt_request_t *req) +{ + if (req->iobuf) { + iobuf_unref(req->iobuf); + } + + if (req->iobref) { + iobref_unref(req->iobref); + } + + sem_destroy(&(req->sem)); + + return; +} + +static cvlt_request_t * +cvlt_alloc_req(archive_t *arch) +{ + cvlt_request_t *reqptr = NULL; + + if (!arch) { + goto err; + } + + if (arch->req_pool) { + reqptr = mem_get0(arch->req_pool); + if (reqptr) { + cvlt_req_init(reqptr); + } + } + + if (reqptr) { + LOCK(&(arch->lock)); + arch->nreqs++; + UNLOCK(&(arch->lock)); + } + +err: + return reqptr; +} + +static int32_t +cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr) +{ + if (!reqptr) { + goto err; + } + + if (!arch) { + goto err; + } + + if (arch->req_pool) { + /* + * Free the request resources if they exist. + */ + + cvlt_req_destroy(reqptr); + mem_put(reqptr); + + LOCK(&(arch->lock)); + arch->nreqs--; + UNLOCK(&(arch->lock)); + } + + return 0; + +err: + return -1; +} + +static int32_t +cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt) +{ + int32_t ret = -1; + int32_t errnum = -1; + int32_t locked = 0; + + /* + * Perform all the initializations needed for brining up the xlator. + */ + if (!arch) { + goto err; + } + + LOCK_INIT(&(arch->lock)); + LOCK(&(arch->lock)); + + locked = 1; + + ret = cvlt_alloc_resources(this, arch, num_req, num_iatt); + + if (ret) { + goto err; + } + + /* + * Now that the fops have been extracted initialize the store + */ + ret = arch->fops.init(&(arch->descinfo), &errnum, plugin); + if (ret) { + goto err; + } + + UNLOCK(&(arch->lock)); + locked = 0; + ret = 0; + + return ret; + +err: + if (arch) { + cvlt_free_resources(arch); + + if (locked) { + UNLOCK(&(arch->lock)); + } + } + + return ret; +} + +static int32_t +cvlt_term_xlator(archive_t *arch) +{ + int32_t errnum = -1; + + if (!arch) { + goto err; + } + + LOCK(&(arch->lock)); + + /* + * Release the resources that have been allocated inside store + */ + arch->fops.fini(&(arch->descinfo), &errnum); + + cvlt_free_resources(arch); + + UNLOCK(&(arch->lock)); + + GF_FREE(arch); + + return 0; + +err: + return -1; +} + +static int32_t +cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info) +{ + if (!store_info) { + return -1; + } + + store_info->prod = priv->product_id; + store_info->prodlen = strlen(priv->product_id); + + store_info->id = priv->store_id; + store_info->idlen = strlen(priv->store_id); + + return 0; +} + +static int32_t +cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info) +{ + if (!xattr || !file_info) { + return -1; + } + + gf_uuid_copy(file_info->uuid, xattr->uuid); + file_info->path = xattr->file_path; + file_info->pathlength = strlen(xattr->file_path); + + return 0; +} + +static int32_t +cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr, + archstore_info_t *store_info) +{ + static char *product = "glusterfs"; + + if (!xattr || !store_info) { + return -1; + } + + store_info->prod = product; + store_info->prodlen = strlen(product); + + store_info->id = xattr->volname; + store_info->idlen = strlen(xattr->volname); + + return 0; +} + +static int32_t +cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr, + archstore_fileinfo_t *file_info) +{ + if (!xattr || !file_info) { + return -1; + } + + gf_uuid_copy(file_info->uuid, xattr->gfid); + file_info->path = xattr->file_path; + file_info->pathlength = strlen(xattr->file_path); + + return 0; +} + +static void +cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs) +{ + /* + * If the file was archived then the reported size will not be a + * correct one. We need to fix this. + */ + if (buf && xattrs) { + buf->ia_size = xattrs->size; + buf->ia_blksize = xattrs->blksize; + buf->ia_blocks = xattrs->blocks; + } + + return; +} + +static void +cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo, + void *cookie, int64_t op_ret, int32_t op_errno) +{ + struct iovec iov; + xlator_t *this = NULL; + struct iatt postbuf = { + 0, + }; + call_frame_t *frame = NULL; + cvlt_request_t *req = (cvlt_request_t *)cookie; + cs_local_t *local = NULL; + cs_private_t *cspriv = NULL; + archive_t *priv = NULL; + + frame = req->frame; + this = frame->this; + local = frame->local; + + cspriv = this->private; + priv = (archive_t *)cspriv->stores->config; + + if (strcmp(priv->trailer, CVLT_TRAILER)) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + gf_msg_debug(plugin, 0, + " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64 + " op : %d ret : %" PRId64 " errno : %d", + req->offset, req->bytes, req->op_type, op_ret, op_errno); + + if (op_ret < 0) { + goto out; + } + + req->iobref = iobref_new(); + if (!req->iobref) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + iobref_add(req->iobref, req->iobuf); + iov.iov_base = iobuf_ptr(req->iobuf); + iov.iov_len = op_ret; + + cvlt_copy_stat_info(&postbuf, &(req->szxattr)); + + /* + * Hack to notify higher layers of EOF. + */ + if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) { + gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s", + uuid_utoa(req->file_info.uuid)); + op_errno = ENOENT; + } + +out: + + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf, + req->iobref, local->xattr_rsp); + + cvlt_free_req(priv, req); + + return; +} + +static void +cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info, + void *cookie, int64_t ret, int errcode) +{ + cvlt_request_t *req = (cvlt_request_t *)cookie; + + gf_msg_debug(plugin, 0, + " Download callback invoked ret : %" PRId64 " errno : %d", + ret, errcode); + + req->op_ret = ret; + req->op_errno = errcode; + sem_post(&(req->sem)); + + return; +} + +void * +cvlt_init(xlator_t *this) +{ + int ret = 0; + archive_t *priv = NULL; + + if (!this->children || this->children->next) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, + "should have exactly one child"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, + "dangling volume. check volfile"); + ret = -1; + goto out; + } + + priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t); + if (!priv) { + ret = -1; + goto out; + } + + priv->trailer = CVLT_TRAILER; + if (cvlt_init_xlator(this, priv, num_req, num_iatt)) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed"); + ret = -1; + goto out; + } + + GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out); + GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out); + + gf_msg(plugin, GF_LOG_INFO, 0, 0, + "store id is : %s " + "product id is : %s.", + priv->store_id, priv->product_id); +out: + if (ret == -1) { + cvlt_term_xlator(priv); + return (NULL); + } + return priv; +} + +int +cvlt_reconfigure(xlator_t *this, dict_t *options) +{ + cs_private_t *cspriv = NULL; + archive_t *priv = NULL; + + cspriv = this->private; + priv = (archive_t *)cspriv->stores->config; + + if (strcmp(priv->trailer, CVLT_TRAILER)) + goto out; + + GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out); + + GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str, + out); + gf_msg_debug(plugin, 0, + "store id is : %s " + "product id is : %s.", + priv->store_id, priv->product_id); + return 0; +out: + return -1; +} + +void +cvlt_fini(void *config) +{ + archive_t *priv = NULL; + + priv = (archive_t *)config; + + if (strcmp(priv->trailer, CVLT_TRAILER)) + return; + + cvlt_term_xlator(priv); + gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources"); + return; +} + +int +cvlt_download(call_frame_t *frame, void *config) +{ + archive_t *parch = NULL; + cs_local_t *local = frame->local; + cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; + cvlt_request_t *req = NULL; + archstore_info_t dest_storeinfo; + archstore_fileinfo_t dest_fileinfo; + int32_t op_ret, op_errno; + + parch = (archive_t *)config; + + if (strcmp(parch->trailer, CVLT_TRAILER)) { + op_ret = -1; + op_errno = EINVAL; + goto err; + } + + gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ", + locxattr->uuid, uuid_utoa(locxattr->gfid)); + + if (!(parch->fops.restore)) { + op_errno = ELIBBAD; + goto err; + } + + /* + * Download needs to be processed. Allocate a request. + */ + req = cvlt_alloc_req(parch); + + if (!req) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED, + " failed to allocated request for gfid=%s", + uuid_utoa(locxattr->gfid)); + op_errno = ENOMEM; + goto err; + } + + /* + * Initialize the request object. + */ + req->op_type = CVLT_RESTORE_OP; + req->frame = frame; + + /* + * The file is currently residing inside a data management store. + * To restore the file contents we need to provide the information + * about data management store. + */ + op_ret = cvlt_init_store_info(parch, &(req->store_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract store info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract file info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + /* + * We need to perform in-place restore of the file from data management + * store to gusterfs volume. + */ + op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract destination store info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract file info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + /* + * Submit the restore request. + */ + op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info), + &(req->file_info), &dest_storeinfo, + &dest_fileinfo, &op_errno, + cvlt_download_complete, req); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, + " failed to restore file gfid=%s from data management store", + uuid_utoa(locxattr->gfid)); + goto err; + } + + /* + * Wait for the restore to complete. + */ + sem_wait(&(req->sem)); + + if (req->op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, + " restored failed for gfid=%s", uuid_utoa(locxattr->gfid)); + goto err; + } + + if (req) { + cvlt_free_req(parch, req); + } + + return 0; + +err: + + if (req) { + cvlt_free_req(parch, req); + } + + return -1; +} + +int +cvlt_read(call_frame_t *frame, void *config) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + archive_t *parch = NULL; + cvlt_request_t *req = NULL; + struct iovec iov = { + 0, + }; + struct iobref *iobref; + size_t size = 0; + off_t off = 0; + + cs_local_t *local = frame->local; + cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; + + size = local->xattrinfo.size; + off = local->xattrinfo.offset; + + parch = (archive_t *)config; + + if (strcmp(parch->trailer, CVLT_TRAILER)) { + op_ret = -1; + op_errno = EINVAL; + goto err; + } + + gf_msg_debug(plugin, 0, + " read invoked for gfid = %s offset = %" PRIu64 + " file_size = %" PRIu64, + uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size); + + if (off >= local->stbuf.ia_size) { + /* + * Hack to notify higher layers of EOF. + */ + + op_errno = ENOENT; + op_ret = 0; + + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, + " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid)); + + goto err; + } + + if (!size) { + op_errno = EINVAL; + + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, + " zero size read attempted on gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + if (!(parch->fops.read)) { + op_errno = ELIBBAD; + goto err; + } + + /* + * The read request need to be processed. Allocate a request. + */ + req = cvlt_alloc_req(parch); + + if (!req) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY, + " failed to allocated request for gfid=%s", + uuid_utoa(locxattr->gfid)); + op_errno = ENOMEM; + goto err; + } + + req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE); + if (!req->iobuf) { + op_errno = ENOMEM; + goto err; + } + + /* + * Initialize the request object. + */ + req->op_type = CVLT_READ_OP; + req->offset = off; + req->bytes = size; + req->frame = frame; + req->szxattr.size = local->stbuf.ia_size; + req->szxattr.blocks = local->stbuf.ia_blocks; + req->szxattr.blksize = local->stbuf.ia_blksize; + + /* + * The file is currently residing inside a data management store. + * To read the file contents we need to provide the information + * about data management store. + */ + op_ret = cvlt_init_store_info(parch, &(req->store_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract store info for gfid=%s" + " offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + " buf=%p", + uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); + goto err; + } + + op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract file info for gfid=%s" + " offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + " buf=%p", + uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); + goto err; + } + + /* + * Submit the read request. + */ + op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info), + &(req->file_info), off, req->iobuf->ptr, size, + &op_errno, cvlt_readv_complete, req); + + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " read failed on gfid=%s" + " offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + " buf=%p", + uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); + goto err; + } + + return 0; + +err: + + iobref = iobref_new(); + gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d", + op_ret, op_errno); + + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, + &(local->stbuf), iobref, local->xattr_rsp); + + if (iobref) { + iobref_unref(iobref); + } + + if (req) { + cvlt_free_req(parch, req); + } + + return 0; +} diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h new file mode 100644 index 00000000000..c45ac948f6c --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _LIBCVLT_H +#define _LIBCVLT_H + +#include <semaphore.h> +#include <glusterfs/xlator.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/syncop.h> +#include <glusterfs/compat-errno.h> +#include "cloudsync-common.h" +#include "libcvlt-mem-types.h" +#include "archivestore.h" + +enum _cvlt_op { + CVLT_READ_OP = 1, + CVLT_WRITE_OP = 2, + CVLT_RESTORE_OP = 3, + CVLT_ARCHIVE_OP = 4, + CVLT_LOOKUP_OP = 5, + CVLT_XATTR_OP = 6, + CVLT_STAT_OP = 7, + CVLT_FSTAT_op = 8, + CVLT_UNDEF_OP = 127 +}; +typedef enum _cvlt_op cvlt_op_t; + +struct _archive; +struct _cvlt_request { + uint64_t offset; + uint64_t bytes; + struct iobuf *iobuf; + struct iobref *iobref; + call_frame_t *frame; + cvlt_op_t op_type; + int32_t op_ret; + int32_t op_errno; + xlator_t *this; + sem_t sem; + archstore_info_t store_info; + archstore_fileinfo_t file_info; + cs_size_xattr_t szxattr; +}; +typedef struct _cvlt_request cvlt_request_t; + +struct _archive { + gf_lock_t lock; /* lock for controlling access */ + xlator_t *xl; /* xlator */ + void *handle; /* handle returned from dlopen */ + int32_t nreqs; /* num requests active */ + struct mem_pool *req_pool; /* pool for requests */ + struct iobuf_pool *iobuf_pool; /* iobuff pool */ + archstore_desc_t descinfo; /* Archive store descriptor info */ + archstore_methods_t fops; /* function pointers */ + char *product_id; + char *store_id; + char *trailer; +}; +typedef struct _archive archive_t; + +void * +cvlt_init(xlator_t *); + +int +cvlt_reconfigure(xlator_t *, dict_t *); + +void +cvlt_fini(void *); + +int +cvlt_download(call_frame_t *, void *); + +int +cvlt_read(call_frame_t *, void *); + +#endif diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c index fbdcdf72b5b..7f0b9e563b8 100644 --- a/xlators/features/cloudsync/src/cloudsync.c +++ b/xlators/features/cloudsync/src/cloudsync.c @@ -16,9 +16,10 @@ #include <glusterfs/call-stub.h> #include "cloudsync-autogen-fops.h" +#include <string.h> #include <dlfcn.h> -void +static void cs_cleanup_private(cs_private_t *priv) { if (priv) { @@ -34,11 +35,15 @@ cs_cleanup_private(cs_private_t *priv) return; } -struct cs_plugin plugins[] = { +static struct cs_plugin plugins[] = { {.name = "cloudsyncs3", .library = "cloudsyncs3.so", .description = "cloudsync s3 store."}, - +#if defined(__linux__) + {.name = "cvlt", + .library = "cloudsynccvlt.so", + .description = "Commvault content store."}, +#endif {.name = NULL}, }; @@ -72,12 +77,14 @@ cs_init(xlator_t *this) this->private = priv; + GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out); + /* temp workaround. Should be configurable through glusterd*/ per_vol = _gf_true; if (per_vol) { - if (dict_get_str(this->options, "cloudsync-storetype", &temp_str) == - 0) { + if (dict_get_str_sizen(this->options, "cloudsync-storetype", + &temp_str) == 0) { for (index = 0; plugins[index].name; index++) { if (!strcmp(temp_str, plugins[index].name)) { libname = plugins[index].library; @@ -135,6 +142,18 @@ cs_init(xlator_t *this) (void)dlerror(); + if (priv->remote_read) { + priv->stores->rdfop = store_methods->fop_remote_read; + if (!priv->stores->rdfop) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "failed to get" + " read fop %s", + dlerror()); + ret = -1; + goto out; + } + } + priv->stores->dlfop = store_methods->fop_download; if (!priv->stores->dlfop) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, @@ -181,8 +200,10 @@ cs_init(xlator_t *this) out: if (ret == -1) { - if (this->local_pool) + if (this->local_pool) { mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } cs_cleanup_private(priv); @@ -196,6 +217,22 @@ out: return ret; } +int +cs_forget(xlator_t *this, inode_t *inode) +{ + uint64_t ctx_int = 0; + cs_inode_ctx_t *ctx = NULL; + + inode_ctx_del(inode, this, &ctx_int); + if (!ctx_int) + return 0; + + ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int; + + GF_FREE(ctx); + return 0; +} + void cs_fini(xlator_t *this) { @@ -217,6 +254,9 @@ cs_reconfigure(xlator_t *this, dict_t *options) goto out; } + GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool, + out); + /* needed only for per volume configuration*/ ret = priv->stores->reconfigure(this, options); @@ -242,32 +282,6 @@ out: } int32_t -cs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - gf_dirent_t *tmp = NULL; - char *sxattr = NULL; - uint64_t ia_size = 0; - int ret = 0; - - list_for_each_entry(tmp, &entries->list, list) - { - ret = dict_get_str(tmp->dict, GF_CS_OBJECT_SIZE, &sxattr); - if (ret) { - gf_msg_trace(this->name, 0, "size xattr found"); - continue; - } - - ia_size = atoll(sxattr); - tmp->d_stat.ia_size = ia_size; - } - - STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); - return 0; -} - -int32_t cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, dict_t *xdata) { @@ -277,16 +291,23 @@ cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, if (!xdata) { xdata = dict_new(); if (!xdata) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "failed to create " + "dict"); goto err; } } - ret = dict_set_int32(xdata, GF_CS_OBJECT_SIZE, 1); + ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1); if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); goto err; } - STACK_WIND(frame, cs_readdirp_cbk, FIRST_CHILD(this), + STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); return 0; err: @@ -305,7 +326,6 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; - /* Do we need lock here? */ local->call_cnt++; if (op_ret == -1) { @@ -320,13 +340,13 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; } else { __cs_inode_ctx_update(this, local->loc.inode, val); - gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %ld", val); + gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); if (local->call_cnt == 1 && (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { gf_msg(this->name, GF_LOG_WARNING, 0, 0, "will repair and download " - "the file, current state : %ld", + "the file, current state : %" PRIu64, val); goto repair; } else { @@ -368,7 +388,6 @@ int32_t cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { - int op_errno = -1; cs_local_t *local = NULL; int ret = 0; cs_inode_ctx_t *ctx = NULL; @@ -381,14 +400,13 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, local = cs_local_init(this, frame, loc, NULL, GF_FOP_TRUNCATE); if (!local) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed"); - op_errno = ENOMEM; goto err; } __cs_inode_ctx_get(this, loc->inode, &ctx); if (ctx) - state = __cs_get_file_state(this, loc->inode, ctx); + state = __cs_get_file_state(loc->inode, ctx); else state = GF_CS_LOCAL; @@ -407,7 +425,6 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, xdata); if (!local->stub) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); - op_errno = ENOMEM; goto err; } @@ -419,14 +436,13 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, local->call_cnt++; ret = locate_and_execute(frame); if (ret) { - op_errno = ENOMEM; goto err; } } return 0; err: - CS_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); + CS_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } @@ -498,7 +514,7 @@ cs_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new()); - tmp = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE); + tmp = dict_get_sizen(dict, GF_CS_OBJECT_UPLOAD_COMPLETE); if (tmp) { /* Value of key should be the atime */ local->stub = fop_setxattr_stub(frame, cs_resume_setxattr, loc, dict, @@ -665,7 +681,7 @@ cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if (op_ret == 0) { ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); if (!ret) { - gf_msg_debug(this->name, 0, "state %ld", val); + gf_msg_debug(this->name, 0, "state %" PRIu64, val); ret = __cs_inode_ctx_update(this, fd->inode, val); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); @@ -831,7 +847,7 @@ out: return 0; } -void * +int cs_download_task(void *arg) { call_frame_t *frame = NULL; @@ -842,7 +858,6 @@ cs_download_task(void *arg) fd_t *fd = NULL; cs_local_t *local = NULL; dict_t *dict = NULL; - int *retval = NULL; frame = (call_frame_t *)arg; @@ -850,13 +865,6 @@ cs_download_task(void *arg) priv = this->private; - retval = GF_CALLOC(1, sizeof(int), gf_common_mt_int); - if (!retval) { - gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); - ret = -1; - goto out; - } - if (!priv->stores) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "No remote store " @@ -972,20 +980,13 @@ out: local->dlfd = NULL; } - if (retval) { - *retval = ret; - pthread_exit(retval); - } else { - pthread_exit(&ret); - } + return ret; } int cs_download(call_frame_t *frame) { - int *retval = NULL; int ret = 0; - pthread_t dthread; cs_local_t *local = NULL; xlator_t *this = NULL; @@ -1000,16 +1001,404 @@ cs_download(call_frame_t *frame) goto out; } - ret = gf_thread_create(&dthread, NULL, &cs_download_task, (void *)frame, - "downloadthread"); + ret = cs_download_task((void *)frame); +out: + return ret; +} + +int +cs_set_xattr_req(call_frame_t *frame) +{ + cs_local_t *local = NULL; + GF_UNUSED int ret = 0; + + local = frame->local; + + /* When remote reads are performed (i.e. reads on remote store), + * there needs to be a way to associate a file on gluster volume + * with its correspnding file on the remote store. In order to do + * that, a unique key can be maintained as an xattr + * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks. + * This xattr should be provided to the plugin to + * perform the read fop on the correct file. This assumes that the file + * hierarchy and name need not be the same on remote store as that of + * the gluster volume. + */ + ret = dict_set_sizen_str_sizen(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID, + "1"); + + return 0; +} + +int +cs_update_xattrs(call_frame_t *frame, dict_t *xdata) +{ + cs_local_t *local = NULL; + xlator_t *this = NULL; + int size = -1; + GF_UNUSED int ret = 0; + + local = frame->local; + this = frame->this; + + local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t), + gf_cs_mt_cs_lxattr_t); + if (!local->xattrinfo.lxattr) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid); + + if (local->remotepath) { + local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath); + if (!local->xattrinfo.lxattr->file_path) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + } - pthread_join(dthread, (void **)&retval); + ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID, + &(local->xattrinfo.lxattr->uuid)); - ret = *retval; + if (ret) { + gf_uuid_clear(local->xattrinfo.lxattr->uuid); + } + size = strlen(this->name) - strlen("-cloudsync") + 1; + local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char); + if (!local->xattrinfo.lxattr->volname) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1); + local->xattrinfo.lxattr->volname[size - 1] = '\0'; + + return 0; +err: + cs_xattrinfo_wipe(local); + return -1; +} + +int +cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags) +{ + xlator_t *this = NULL; + cs_private_t *priv = NULL; + int ret = -1; + fd_t *fd = NULL; + cs_local_t *local = NULL; + + local = frame->local; + this = frame->this; + priv = this->private; + + if (!local->remotepath) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "remote path not" + " available. Check posix logs to resolve"); + goto out; + } + + if (!priv->stores) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "No remote store " + "plugins found"); + ret = -1; + goto out; + } + + if (local->fd) { + fd = fd_anonymous(local->fd->inode); + } else { + fd = fd_anonymous(local->loc.inode); + } + + local->xattrinfo.size = size; + local->xattrinfo.offset = offset; + local->xattrinfo.flags = flags; + + if (!fd) { + gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed"); + ret = -1; + goto out; + } + + local->dlfd = fd; + local->dloffset = offset; + + /*this calling method is for per volume setting */ + ret = priv->stores->rdfop(frame, priv->stores->config); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "read failed" + ", remotepath: %s", + local->remotepath); + ret = -1; + goto out; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "read success, path" + " : %s", + local->remotepath); + } out: - if (retval) - GF_FREE(retval); + if (fd) { + fd_unref(fd); + local->dlfd = NULL; + } + return ret; +} + +int32_t +cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +{ + cs_local_t *local = NULL; + int ret = 0; + uint64_t val = 0; + fd_t *fd = NULL; + + local = frame->local; + fd = local->fd; + + local->call_cnt++; + + if (op_ret == -1) { + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (ret == 0) { + if (val == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "could not get file state, unwinding"); + op_ret = -1; + op_errno = EIO; + goto unwind; + } else { + __cs_inode_ctx_update(this, fd->inode, val); + gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + " will read from remote : %" PRIu64, val); + goto repair; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "second readv, Unwinding"); + goto unwind; + } + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state " + "could not be figured, unwinding"); + goto unwind; + } + } else { + /* successful readv => file is local */ + __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL); + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "state : GF_CS_LOCAL" + ", readv successful"); + + goto unwind; + } + +repair: + ret = locate_and_execute(frame); + if (ret) { + goto unwind; + } + + return 0; + +unwind: + CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); + + return 0; +} + +int32_t +cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int ret = 0; + + ret = cs_resume_postprocess(this, frame, fd->inode); + if (ret) { + goto unwind; + } + + cs_inodelk_unlock(frame); + + STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + + return 0; + +unwind: + cs_inodelk_unlock(frame); + + cs_common_cbk(frame); + + return 0; +} + +int32_t +cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ + int ret = 0; + cs_local_t *local = NULL; + gf_cs_obj_state state = -1; + cs_inode_ctx_t *ctx = NULL; + + cs_inodelk_unlock(frame); + + local = frame->local; + if (!local) { + ret = -1; + goto unwind; + } + + __cs_inode_ctx_get(this, fd->inode, &ctx); + + state = __cs_get_file_state(fd->inode, ctx); + if (state == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "status is GF_CS_ERROR." + " Aborting readv"); + local->op_ret = -1; + local->op_errno = EREMOTE; + ret = -1; + goto unwind; + } + + /* Serve readv from remote store only if it is remote. */ + gf_msg_debug(this->name, 0, "status of file %s is %d", + local->remotepath ? local->remotepath : "", state); + + /* We will reach this condition if local inode ctx had REMOTE + * state when the control was in cs_readv but after stat + * we got an updated state saying that the file is LOCAL. + */ + if (state == GF_CS_LOCAL) { + STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + } else if (state == GF_CS_REMOTE) { + ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset, + size, flags); + /* Failed to submit the remote readv fop to plugin */ + if (ret) { + local->op_ret = -1; + local->op_errno = EREMOTE; + goto unwind; + } + /* When the file is in any other intermediate state, + * we should not perform remote reads. + */ + } else { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + + return 0; + +unwind: + cs_common_cbk(frame); + + return 0; +} + +int32_t +cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int op_errno = ENOMEM; + cs_local_t *local = NULL; + int ret = 0; + cs_inode_ctx_t *ctx = NULL; + gf_cs_obj_state state = -1; + cs_private_t *priv = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + priv = this->private; + + local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed"); + goto err; + } + + __cs_inode_ctx_get(this, fd->inode, &ctx); + + if (ctx) + state = __cs_get_file_state(fd->inode, ctx); + else + state = GF_CS_LOCAL; + + local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new()); + + ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); + goto err; + } + + if (priv->remote_read) { + local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size, + offset, flags, xdata); + } else { + local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset, + flags, xdata); + } + if (!local->stub) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + goto err; + } + + if (state == GF_CS_LOCAL) { + STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + } else { + local->call_cnt++; + ret = locate_and_execute(frame); + if (ret) { + goto err; + } + } + + return 0; + +err: + CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL); + + return 0; +} + +int +cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, + inode_t *inode, off_t offset, size_t size, + uint32_t flags) +{ + int ret = 0; + + ret = cs_serve_readv(frame, offset, size, flags); return ret; } @@ -1059,7 +1448,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, goto err; } else { ret = __cs_inode_ctx_update(this, inode, val); - gf_msg_debug(this->name, 0, "status : %lu", val); + gf_msg_debug(this->name, 0, "status : %" PRIu64, val); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); local->op_ret = -1; @@ -1074,7 +1463,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, goto err; } - ret = dict_get_str(xdata, GF_CS_OBJECT_REMOTE, &filepath); + ret = dict_get_str_sizen(xdata, GF_CS_OBJECT_REMOTE, &filepath); if (filepath) { gf_msg_debug(this->name, 0, "filepath returned %s", filepath); local->remotepath = gf_strdup(filepath); @@ -1087,6 +1476,10 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, gf_msg_debug(this->name, 0, "NULL filepath"); } + ret = cs_update_xattrs(frame, xdata); + if (ret) + goto err; + local->op_ret = 0; local->xattr_rsp = dict_ref(xdata); memcpy(&local->stbuf, stbuf, sizeof(struct iatt)); @@ -1121,6 +1514,8 @@ cs_do_stat_check(call_frame_t *main_frame) goto err; } + cs_set_xattr_req(main_frame); + if (local->fd) { STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req); @@ -1177,6 +1572,10 @@ cs_common_cbk(call_frame_t *frame) NULL, NULL, NULL); break; + case GF_FOP_TRUNCATE: + CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, + NULL, NULL, NULL); + break; default: break; } @@ -1354,7 +1753,7 @@ cs_resume_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, __cs_inode_ctx_get(this, loc->inode, &ctx); - state = __cs_get_file_state(this, loc->inode, ctx); + state = __cs_get_file_state(loc->inode, ctx); if (state == GF_CS_ERROR) { /* file is already remote */ @@ -1396,7 +1795,7 @@ unwind: } gf_cs_obj_state -__cs_get_file_state(xlator_t *this, inode_t *inode, cs_inode_ctx_t *ctx) +__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx) { gf_cs_obj_state state = -1; @@ -1427,7 +1826,7 @@ __cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx) if (ret) *ctx = NULL; else - *ctx = (cs_inode_ctx_t *)ctxint; + *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; return; } @@ -1452,7 +1851,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val) ctx->state = val; - ctxint = (uint64_t)ctx; + ctxint = (uint64_t)(uintptr_t)ctx; ret = __inode_ctx_set(inode, this, &ctxint); if (ret) { @@ -1460,7 +1859,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val) goto out; } } else { - ctx = (cs_inode_ctx_t *)ctxint; + ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; ctx->state = val; } @@ -1483,7 +1882,7 @@ cs_inode_ctx_reset(xlator_t *this, inode_t *inode) return 0; } - ctx = (cs_inode_ctx_t *)ctxint; + ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; GF_FREE(ctx); return 0; @@ -1505,7 +1904,7 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode) __cs_inode_ctx_get(this, inode, &ctx); - state = __cs_get_file_state(this, inode, ctx); + state = __cs_get_file_state(inode, ctx); if (state == GF_CS_ERROR) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "status is GF_CS_ERROR." @@ -1532,6 +1931,7 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode) out: return ret; } + int32_t cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict) { @@ -1627,7 +2027,9 @@ struct xlator_fops cs_fops = { .zerofill = cs_zerofill, }; -struct xlator_cbks cs_cbks = {}; +struct xlator_cbks cs_cbks = { + .forget = cs_forget, +}; struct xlator_dumpops cs_dumpops = { .fdctx_to_dict = cs_fdctx_to_dict, @@ -1647,6 +2049,15 @@ struct volume_options cs_options[] = { {.key = {"cloudsync-storetype"}, .type = GF_OPTION_TYPE_STR, .description = "Defines which remote store is enabled"}, + {.key = {"cloudsync-remote-read"}, + .type = GF_OPTION_TYPE_BOOL, + .description = "Defines a remote read fop when on"}, + {.key = {"cloudsync-store-id"}, + .type = GF_OPTION_TYPE_STR, + .description = "Defines a volume wide store id"}, + {.key = {"cloudsync-product-id"}, + .type = GF_OPTION_TYPE_STR, + .description = "Defines a volume wide product id"}, {.key = {NULL}}, }; diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h index dbdb207ddea..d24141978d6 100644 --- a/xlators/features/cloudsync/src/cloudsync.h +++ b/xlators/features/cloudsync/src/cloudsync.h @@ -19,6 +19,7 @@ #include "cloudsync-common.h" #include "cloudsync-autogen-fops.h" +#define ALIGN_SIZE 4096 #define CS_LOCK_DOMAIN "cs.protect.file.stat" typedef struct cs_dlstore { off_t off; @@ -29,6 +30,7 @@ typedef struct cs_dlstore { } cs_dlstore; typedef struct cs_inode_ctx { + cs_loc_xattr_t locxattr; gf_cs_obj_state state; } cs_inode_ctx_t; @@ -85,7 +87,7 @@ void __cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx); gf_cs_obj_state -__cs_get_file_state(xlator_t *this, inode_t *inode, cs_inode_ctx_t *ctx); +__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx); int cs_inodelk_unlock(call_frame_t *main_frame); @@ -100,4 +102,22 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xattr_req); + +int32_t +cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata); +int32_t +cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata); +int32_t +cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata); + +int +cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, + inode_t *inode, off_t offset, size_t size, + uint32_t flags); +int +cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags); #endif /* __CLOUDSYNC_H__ */ diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c index ad7776741d9..3fea5672a21 100644 --- a/xlators/features/gfid-access/src/gfid-access.c +++ b/xlators/features/gfid-access/src/gfid-access.c @@ -327,10 +327,8 @@ out: static gf_boolean_t __is_gfid_access_dir(uuid_t gfid) { - uuid_t aux_gfid; - - memset(aux_gfid, 0, 16); - aux_gfid[15] = GF_AUX_GFID; + static uuid_t aux_gfid = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, GF_AUX_GFID}; if (gf_uuid_compare(gfid, aux_gfid) == 0) return _gf_true; diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index d9768b32d7f..4abb2c73ce5 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -1685,21 +1685,25 @@ index_get_gfid_type(void *opaque) loc_wipe(&loc); - entry->d_type = IA_INVAL; + entry->d_type = gf_d_type_from_ia_type(IA_INVAL); + entry->d_stat.ia_type = IA_INVAL; if (gf_uuid_parse(entry->d_name, loc.gfid)) continue; loc.inode = inode_find(args->parent->table, loc.gfid); if (loc.inode) { - entry->d_type = loc.inode->ia_type; + entry->d_stat.ia_type = loc.inode->ia_type; + entry->d_type = gf_d_type_from_ia_type(loc.inode->ia_type); continue; } loc.inode = inode_new(args->parent->table); if (!loc.inode) continue; ret = syncop_lookup(FIRST_CHILD(this), &loc, &iatt, 0, 0, 0); - if (ret == 0) - entry->d_type = iatt.ia_type; + if (ret == 0) { + entry->d_type = gf_d_type_from_ia_type(iatt.ia_type); + entry->d_stat = iatt; + } } loc_wipe(&loc); @@ -2100,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) worker_enqueue(this, stub); return 0; normal: - ret = dict_get_str(xattr_req, "link-count", &flag); + ret = dict_get_str_sizen(xattr_req, "link-count", &flag); if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xattr_req); @@ -2474,6 +2478,7 @@ out: GF_FREE(priv); this->private = NULL; mem_pool_destroy(this->local_pool); + this->local_pool = NULL; } if (attr_inited) @@ -2587,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...) if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) { stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; /* Wait for draining stub from queue before notify PARENT_DOWN */ diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c index 8699b99d291..56dee244281 100644 --- a/xlators/features/leases/src/leases-internal.c +++ b/xlators/features/leases/src/leases-internal.c @@ -73,7 +73,7 @@ out: * timeout value(in seconds) set as an option to this xlator. * -1 error case */ -int32_t +static int32_t get_recall_lease_timeout(xlator_t *this) { leases_private_t *priv = NULL; @@ -356,9 +356,8 @@ out: static lease_inode_t * new_lease_inode(inode_t *inode) { - lease_inode_t *l_inode = NULL; - - l_inode = GF_CALLOC(1, sizeof(*l_inode), gf_leases_mt_lease_inode_t); + lease_inode_t *l_inode = GF_MALLOC(sizeof(*l_inode), + gf_leases_mt_lease_inode_t); if (!l_inode) goto out; @@ -379,9 +378,8 @@ __destroy_lease_inode(lease_inode_t *l_inode) static lease_client_t * new_lease_client(const char *client_uid) { - lease_client_t *clnt = NULL; - - clnt = GF_CALLOC(1, sizeof(*clnt), gf_leases_mt_lease_client_t); + lease_client_t *clnt = GF_MALLOC(sizeof(*clnt), + gf_leases_mt_lease_client_t); if (!clnt) goto out; @@ -448,29 +446,29 @@ out: static int add_inode_to_client_list(xlator_t *this, inode_t *inode, const char *client_uid) { - int ret = 0; - leases_private_t *priv = NULL; + leases_private_t *priv = this->private; lease_client_t *clnt = NULL; - lease_inode_t *lease_inode = NULL; - priv = this->private; + lease_inode_t *lease_inode = new_lease_inode(inode); + if (!lease_inode) + return -ENOMEM; + pthread_mutex_lock(&priv->mutex); { clnt = __get_or_new_lease_client(this, priv, client_uid); - GF_CHECK_ALLOC(clnt, ret, out); - - lease_inode = new_lease_inode(inode); - GF_CHECK_ALLOC(lease_inode, ret, out); - + if (!clnt) { + pthread_mutex_unlock(&priv->mutex); + __destroy_lease_inode(lease_inode); + return -ENOMEM; + } list_add_tail(&clnt->inode_list, &lease_inode->list); - gf_msg_debug(this->name, 0, - "Added a new inode:%p to the client(%s) " - "cleanup list, gfid(%s)", - inode, client_uid, uuid_utoa(inode->gfid)); } -out: pthread_mutex_unlock(&priv->mutex); - return ret; + gf_msg_debug(this->name, 0, + "Added a new inode:%p to the client(%s) " + "cleanup list, gfid(%s)", + inode, client_uid, uuid_utoa(inode->gfid)); + return 0; } /* Add lease entry to the corresponding client entry. @@ -587,15 +585,17 @@ remove_from_clnt_list(xlator_t *this, const char *client_uid, inode_t *inode) { clnt = __get_lease_client(this, priv, client_uid); if (!clnt) { + pthread_mutex_unlock(&priv->mutex); gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_CLNT_NOTFOUND, "There is no client entry found in the cleanup list"); - pthread_mutex_unlock(&priv->mutex); goto out; } ret = __remove_inode_from_clnt_list(this, clnt, inode); if (ret) { + pthread_mutex_unlock(&priv->mutex); gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INODE_NOTFOUND, "There is no inode entry found in the cleanup list"); + goto out; } } pthread_mutex_unlock(&priv->mutex); @@ -854,20 +854,20 @@ recall_lease_timer_handler(struct gf_tw_timer_list *timer, void *data, priv = timer_data->this->private; inode = timer_data->inode; + lease_inode = new_lease_inode(inode); + if (!lease_inode) { + errno = ENOMEM; + goto out; + } pthread_mutex_lock(&priv->mutex); { - lease_inode = new_lease_inode(inode); - if (!lease_inode) { - errno = ENOMEM; - goto out; - } list_add_tail(&lease_inode->list, &priv->recall_list); pthread_cond_broadcast(&priv->cond); } + pthread_mutex_unlock(&priv->mutex); out: /* unref the inode_ref taken by timer_data in __recall_lease */ inode_unref(timer_data->inode); - pthread_mutex_unlock(&priv->mutex); GF_FREE(timer); } @@ -887,6 +887,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx) struct gf_tw_timer_list *timer = NULL; leases_private_t *priv = NULL; lease_timer_data_t *timer_data = NULL; + time_t recall_time; if (lease_ctx->recall_in_progress) { gf_msg_debug(this->name, 0, @@ -896,6 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx) } priv = this->private; + recall_time = gf_time(); list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, lease_id_list) { @@ -919,9 +921,9 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx) } lease_ctx->recall_in_progress = _gf_true; - lease_entry->recall_time = time(NULL); + lease_entry->recall_time = recall_time; } - timer = GF_CALLOC(1, sizeof(*timer), gf_common_mt_tw_timer_list); + timer = GF_MALLOC(sizeof(*timer), gf_common_mt_tw_timer_list); if (!timer) { goto out; } @@ -1146,12 +1148,13 @@ check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id, pthread_mutex_lock(&lease_ctx->lock); { if (lease_ctx->lease_type == NONE) { + pthread_mutex_unlock(&lease_ctx->lock); gf_msg_debug(frame->this->name, 0, "No leases found continuing with the" " fop:%s", gf_fop_list[frame->root->op]); ret = WIND_FOP; - goto unlock; + goto out; } conflicts = __check_lease_conflict(frame, lease_ctx, lease_id, is_write_fop); @@ -1178,7 +1181,6 @@ check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id, } } } -unlock: pthread_mutex_unlock(&lease_ctx->lock); out: return ret; @@ -1355,6 +1357,7 @@ expired_recall_cleanup(void *data) lease_inode_t *tmp = NULL; leases_private_t *priv = NULL; xlator_t *this = NULL; + time_t time_now; GF_VALIDATE_OR_GOTO("leases", data, out); @@ -1364,6 +1367,7 @@ expired_recall_cleanup(void *data) gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread"); while (1) { + time_now = gf_time(); pthread_mutex_lock(&priv->mutex); { if (priv->fini) { @@ -1372,7 +1376,7 @@ expired_recall_cleanup(void *data) } INIT_LIST_HEAD(&recall_cleanup_list); if (list_empty(&priv->recall_list)) { - sleep_till.tv_sec = time(NULL) + 600; + sleep_till.tv_sec = time_now + 600; pthread_cond_timedwait(&priv->cond, &priv->mutex, &sleep_till); } if (!list_empty(&priv->recall_list)) { diff --git a/xlators/features/leases/src/leases.c b/xlators/features/leases/src/leases.c index 56299632e1d..04bee50ba3f 100644 --- a/xlators/features/leases/src/leases.c +++ b/xlators/features/leases/src/leases.c @@ -35,6 +35,7 @@ leases_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, char *lease_id = NULL; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_leases_mt_fd_ctx_t); if (!fd_ctx) { @@ -109,6 +110,7 @@ leases_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); @@ -157,6 +159,7 @@ leases_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); @@ -202,6 +205,7 @@ leases_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS_LK(cmd, flock->l_type, fd->flags); @@ -240,6 +244,7 @@ leases_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t op_ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); ret = process_lease_req(frame, this, loc->inode, lease); if (ret < 0) { @@ -282,6 +287,7 @@ leases_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, 0); @@ -328,6 +334,7 @@ leases_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, 0); @@ -376,6 +383,7 @@ leases_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); /* should the lease be also checked for newloc */ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); @@ -424,6 +432,7 @@ leases_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, 0); @@ -470,6 +479,7 @@ leases_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, 0); @@ -516,6 +526,7 @@ leases_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, flags); @@ -563,6 +574,7 @@ leases_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); @@ -607,6 +619,7 @@ leases_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, 0); /* TODO:fd->flags?*/ @@ -652,6 +665,7 @@ leases_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); @@ -697,6 +711,7 @@ leases_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); @@ -744,6 +759,7 @@ leases_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); @@ -789,6 +805,7 @@ leases_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, int ret = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); @@ -834,6 +851,7 @@ leases_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) uint64_t ctx = 0; EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); GET_FLAGS(frame->root->op, fd->flags); diff --git a/xlators/features/leases/src/leases.h b/xlators/features/leases/src/leases.h index 9749fd903e1..a6e8a6824cc 100644 --- a/xlators/features/leases/src/leases.h +++ b/xlators/features/leases/src/leases.h @@ -45,6 +45,14 @@ goto label; \ } while (0) +#define EXIT_IF_INTERNAL_FOP(frame, xdata, label) \ + do { \ + if (frame->root->pid < 0) \ + goto label; \ + if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \ + goto label; \ + } while (0) + #define GET_LEASE_ID(xdata, lease_id, client_uid) \ do { \ int ret_val = -1; \ @@ -144,17 +152,19 @@ } while (0) struct _leases_private { - gf_boolean_t leases_enabled; - int32_t recall_lease_timeout; struct list_head client_list; struct list_head recall_list; struct tvec_base *timer_wheel; /* timer wheel where the recall request is qued and waits for unlock/expiry */ - gf_boolean_t fini; pthread_t recall_thr; - gf_boolean_t inited_recall_thr; pthread_mutex_t mutex; pthread_cond_t cond; + int32_t recall_lease_timeout; + gf_boolean_t inited_recall_thr; + gf_boolean_t fini; + gf_boolean_t leases_enabled; + + char _pad[1]; /* manual padding */ }; typedef struct _leases_private leases_private_t; @@ -181,18 +191,20 @@ typedef struct _lease_fd_ctx lease_fd_ctx_t; struct _lease_inode_ctx { struct list_head lease_id_list; /* clients that have taken leases */ int lease_type_cnt[GF_LEASE_MAX_TYPE + 1]; + uint64_t lease_cnt; /* Total number of leases on this inode */ + uint64_t openfd_cnt; /* number of fds open */ + struct list_head blocked_list; /* List of fops blocked until the + lease recall is complete */ + inode_t *inode; /* this represents the inode on which the + lock was taken, required mainly during + disconnect cleanup */ + struct gf_tw_timer_list *timer; + pthread_mutex_t lock; int lease_type; /* Types of leases acquired */ - uint64_t lease_cnt; /* Total number of leases on this inode */ - uint64_t openfd_cnt; /* number of fds open */ gf_boolean_t recall_in_progress; /* if lease recall is sent on this inode */ gf_boolean_t blocked_fops_resuming; /* if blocked fops are being resumed */ - struct list_head blocked_list; /* List of fops blocked until the - lease recall is complete */ - inode_t *inode; /* this represents the inode on which the - lock was taken, required mainly during - disconnect cleanup */ - struct gf_tw_timer_list *timer; - pthread_mutex_t lock; + + char _pad[2]; /* manual padding */ }; typedef struct _lease_inode_ctx lease_inode_ctx_t; @@ -202,11 +214,12 @@ struct _lease_id_entry { char *client_uid; /* uid of the client that has taken the lease */ int lease_type_cnt[GF_LEASE_MAX_TYPE + 1]; /* count of each lease type */ - int lease_type; /* Union of all the leases taken - under the given lease id */ uint64_t lease_cnt; /* Number of leases taken under the given lease id */ time_t recall_time; /* time @ which recall was sent */ + int lease_type; /* Union of all the leases taken + under the given lease id */ + char _pad[4]; /* manual padding */ }; typedef struct _lease_id_entry lease_id_entry_t; @@ -226,9 +239,6 @@ typedef struct __lease_timer_data lease_timer_data_t; gf_boolean_t is_leases_enabled(xlator_t *this); -int32_t -get_recall_lease_timeout(xlator_t *this); - lease_inode_ctx_t * lease_ctx_get(inode_t *inode, xlator_t *this); diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 116aed68690..ab1eac68a53 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, if (plock->blocked) { bcount++; pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, - &plock->user_flock, -1, EAGAIN, NULL); + &plock->user_flock, -1, EINTR, NULL); - STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN, + STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, &plock->user_flock, NULL); } else { diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 6e7fb4b2f63..a2c6be93e03 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -14,9 +14,7 @@ #include <glusterfs/glusterfs.h> #include <glusterfs/compat.h> -#include <glusterfs/xlator.h> #include <glusterfs/logging.h> -#include <glusterfs/common-utils.h> #include <glusterfs/syncop.h> #include "locks.h" @@ -462,11 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) INIT_LIST_HEAD(&pl_inode->blocked_calls); INIT_LIST_HEAD(&pl_inode->metalk_list); INIT_LIST_HEAD(&pl_inode->queued_locks); + INIT_LIST_HEAD(&pl_inode->waiting); gf_uuid_copy(pl_inode->gfid, inode->gfid); pl_inode->check_mlock_info = _gf_true; pl_inode->mlock_enforced = _gf_false; + /* -2 means never looked up. -1 means something went wrong and link + * tracking is disabled. */ + pl_inode->links = -2; + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); if (ret) { pthread_mutex_destroy(&pl_inode->mutex); @@ -478,7 +481,7 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) unlock: UNLOCK(&inode->lock); - if (pl_is_mandatory_locking_enabled(pl_inode) && + if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) && pl_inode->check_mlock_info && local) { /* Note: The lock enforcement information per file can be stored in the attribute flag of stat(x) in posix. With that there won't be a need @@ -493,7 +496,8 @@ unlock: /* Create a new posix_lock_t */ posix_lock_t * new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, - gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking) + gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking, + int32_t *op_errno) { posix_lock_t *lock = NULL; @@ -501,8 +505,14 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, GF_VALIDATE_OR_GOTO("posix-locks", client, out); GF_VALIDATE_OR_GOTO("posix-locks", fd, out); + if (!pl_is_lk_owner_valid(owner, client)) { + *op_errno = EINVAL; + goto out; + } + lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t); if (!lock) { + *op_errno = ENOMEM; goto out; } @@ -520,6 +530,7 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, if (lock->client_uid == NULL) { GF_FREE(lock); lock = NULL; + *op_errno = ENOMEM; goto out; } @@ -594,13 +605,11 @@ static void __insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock) { if (lock->blocked) - gettimeofday(&lock->blkd_time, NULL); + lock->blkd_time = gf_time(); else - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add_tail(&lock->list, &pl_inode->ext_list); - - return; } /* Return true if the locks overlap, false otherwise */ @@ -956,7 +965,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) struct list_head granted_list; posix_lock_t *tmp = NULL; posix_lock_t *lock = NULL; - + pl_local_t *local = NULL; INIT_LIST_HEAD(&granted_list); pthread_mutex_lock(&pl_inode->mutex); @@ -971,9 +980,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, 0, 0, NULL); - - STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); - + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); __destroy_lock(lock); } @@ -988,10 +997,12 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, 0, }; posix_lock_t *unlock_lock = NULL; + int32_t op_errno = 0; struct list_head granted_list; posix_lock_t *tmp = NULL; posix_lock_t *lock = NULL; + pl_local_t *local = NULL; int ret = -1; @@ -1005,7 +1016,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid, &old_lock->owner, old_lock->fd, - old_lock->lk_flags, 0); + old_lock->lk_flags, 0, &op_errno); GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out); ret = 0; @@ -1019,9 +1030,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, 0, 0, NULL); - - STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); - + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); __destroy_lock(lock); } @@ -1080,6 +1091,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); + + pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, NULL); + lock->blocked = 1; __insert_lock(pl_inode, lock); ret = -1; @@ -1182,13 +1197,6 @@ pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock) list_del_init(&rw->list); list_add(&rw->list, &unwind_rw_list); } - - while (pl_inode->fop_wind_count != 0) { - gf_msg(THIS->name, GF_LOG_TRACE, 0, 0, - "waiting for fops to be drained"); - pthread_cond_wait(&pl_inode->check_fop_wind_count, - &pl_inode->mutex); - } } pthread_mutex_unlock(&pl_inode->mutex); @@ -1206,7 +1214,6 @@ pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock) { pl_clean_local(rw->stub->frame->local); call_unwind_error(rw->stub, -1, EBUSY); - GF_FREE(lock); } return ret; @@ -1266,10 +1273,319 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) return -1; } - local->inode = (fd ? inode_ref(fd->inode) : inode_ref(loc->inode)); + local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode)); frame->local = local; } return 0; -}
\ No newline at end of file +} + +gf_boolean_t +pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) +{ + if (client && (client->opversion < GD_OP_VERSION_7_0)) { + return _gf_true; + } + + if (is_lk_owner_null(owner)) { + return _gf_false; + } + return _gf_true; +} + +static int32_t +pl_inode_from_loc(loc_t *loc, inode_t **pinode) +{ + inode_t *inode = NULL; + int32_t error = 0; + + if (loc->inode != NULL) { + inode = inode_ref(loc->inode); + goto done; + } + + if (loc->parent == NULL) { + error = EINVAL; + goto done; + } + + if (!gf_uuid_is_null(loc->gfid)) { + inode = inode_find(loc->parent->table, loc->gfid); + if (inode != NULL) { + goto done; + } + } + + if (loc->name == NULL) { + error = EINVAL; + goto done; + } + + inode = inode_grep(loc->parent->table, loc->parent, loc->name); + if (inode == NULL) { + /* We haven't found any inode. This means that the file doesn't exist + * or that even if it exists, we don't have any knowledge about it, so + * we don't have locks on it either, which is fine for our purposes. */ + goto done; + } + +done: + *pinode = inode; + + return error; +} + +static gf_boolean_t +pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, + struct timespec *now, struct list_head *contend) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *lock; + gf_boolean_t has_owners = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->inodelk_list, list) + { + /* If the lock belongs to the same client, we assume it's related + * to the same operation, so we allow the removal to continue. */ + if (lock->client == client) { + continue; + } + /* If the lock belongs to an internal process, we don't block the + * removal. */ + if (lock->client_pid < 0) { + continue; + } + if (contend == NULL) { + return _gf_true; + } + has_owners = _gf_true; + inodelk_contention_notify_check(xl, lock, now, contend); + } + } + + return has_owners; +} + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend) +{ + struct timespec now; + inode_t *inode; + pl_inode_t *pl_inode; + int32_t error; + + pl_inode = NULL; + + error = pl_inode_from_loc(loc, &inode); + if ((error != 0) || (inode == NULL)) { + goto done; + } + + pl_inode = pl_inode_get(xl, inode, NULL); + if (pl_inode == NULL) { + inode_unref(inode); + error = ENOMEM; + goto done; + } + + /* pl_inode_from_loc() already increments ref count for inode, so + * we only assign here our reference. */ + pl_inode->inode = inode; + + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (pl_inode->removed) { + error = ESTALE; + goto unlock; + } + + if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { + error = -1; + /* We skip the unlock here because the caller must create a stub when + * we return -1 and do a call to pl_inode_remove_complete(), which + * assumes the lock is still acquired and will release it once + * everything else is prepared. */ + goto done; + } + + pl_inode->is_locked = _gf_true; + pl_inode->remove_running++; + +unlock: + pthread_mutex_unlock(&pl_inode->mutex); + +done: + *ppl_inode = pl_inode; + + return error; +} + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend) +{ + pl_inode_lock_t *lock; + int32_t error = -1; + + if (stub != NULL) { + list_add_tail(&stub->list, &pl_inode->waiting); + pl_inode->is_locked = _gf_true; + } else { + error = ENOMEM; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, list); + list_del_init(&lock->list); + __pl_inodelk_unref(lock); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + if (error < 0) { + inodelk_contention_notify(xl, contend); + } + + inode_unref(pl_inode->inode); + + return error; +} + +void +pl_inode_remove_wake(struct list_head *list) +{ + call_stub_t *stub; + + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); + + call_resume(stub); + } +} + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) +{ + struct list_head contend, granted; + struct timespec now; + pl_dom_list_t *dom; + + if (pl_inode == NULL) { + return; + } + + INIT_LIST_HEAD(&contend); + INIT_LIST_HEAD(&granted); + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (error == 0) { + if (pl_inode->links >= 0) { + pl_inode->links--; + } + if (pl_inode->links == 0) { + pl_inode->removed = _gf_true; + } + } + + pl_inode->remove_running--; + + if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { + pl_inode->is_locked = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, + &contend); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(xl, pl_inode, &granted); + + inodelk_contention_notify(xl, &contend); + + inode_unref(pl_inode->inode); +} + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list) +{ + call_stub_t *stub, *tmp; + + if (!pl_inode->is_locked) { + return; + } + + list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) + { + if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, + NULL)) { + list_move_tail(&stub->list, list); + } + } +} + +/* This function determines if an inodelk attempt can be done now or it needs + * to wait. + * + * Possible return values: + * < 0: An error occurred. Currently only -ESTALE can be returned if the + * inode has been deleted previously by unlink/rmdir/rename + * = 0: The lock can be attempted. + * > 0: The lock needs to wait because a conflicting remove operation is + * ongoing. + */ +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *ilock; + + /* If the inode has been deleted, we won't allow any lock. */ + if (pl_inode->removed) { + return -ESTALE; + } + + /* We only synchronize with locks made for regular operations coming from + * the user. Locks done for internal purposes are hard to control and could + * lead to long delays or deadlocks quite easily. */ + if (lock->client_pid < 0) { + return 0; + } + if (!pl_inode->is_locked) { + return 0; + } + if (pl_inode->remove_running > 0) { + return 1; + } + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(ilock, &dom->inodelk_list, list) + { + /* If a lock from the same client is already granted, we allow this + * one to continue. This is necessary to prevent deadlocks when + * multiple locks are taken for the same operation. + * + * On the other side it's unlikely that the same client sends + * completely unrelated locks for the same inode. + */ + if (ilock->client == lock->client) { + return 0; + } + } + } + + return 1; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 3a7496758ab..281223bf3b8 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -10,7 +10,6 @@ #ifndef __COMMON_H__ #define __COMMON_H__ -#include <glusterfs/lkowner.h> /*dump locks format strings */ #define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu" #define ENTRY_FMT "type=%s on basename=%s" @@ -45,13 +44,18 @@ fd_unref(__local->fd); \ if (__local->inode) \ inode_unref(__local->inode); \ + if (__local->xdata) { \ + dict_unref(__local->xdata); \ + __local->xdata = NULL; \ + } \ mem_put(__local); \ } \ } while (0) posix_lock_t * new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, - gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int can_block); + gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking, + int32_t *op_errno); pl_inode_t * pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local); @@ -101,6 +105,15 @@ void __pl_inodelk_unref(pl_inode_lock_t *lock); void +__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend); + +void +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted); + +void grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend); @@ -200,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode); void __pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock); +void +inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock, + struct timespec *now, + struct list_head *contend); + +void +entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock, + struct timespec *now, + struct list_head *contend); + gf_boolean_t pl_does_monkey_want_stuck_lock(); @@ -212,4 +235,28 @@ pl_clean_local(pl_local_t *local); int pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd); +gf_boolean_t +pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client); + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend); + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend); + +void +pl_inode_remove_wake(struct list_head *list); + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error); + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list); + +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock); + #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index ced5eca2749..fd772c850dd 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -39,13 +39,20 @@ __pl_entrylk_ref(pl_entry_lock_t *lock) static pl_entry_lock_t * new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type, - const char *domain, call_frame_t *frame, char *conn_id) + const char *domain, call_frame_t *frame, char *conn_id, + int32_t *op_errno) { pl_entry_lock_t *newlock = NULL; + if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) { + *op_errno = EINVAL; + goto out; + } + newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t), gf_locks_mt_pl_entry_lock_t); if (!newlock) { + *op_errno = ENOMEM; goto out; } @@ -114,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, pl_entry_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; @@ -122,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, * chance? Or just the locks we are attempting to acquire? */ if (names_conflict(candidate_lock->basename, requested_lock->basename)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -197,9 +202,9 @@ out: return revoke_lock; } -static gf_boolean_t -__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, - struct timespec *now) +void +entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -209,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -218,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -231,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -325,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, break; } } - if (__entrylk_needs_contention_notify(this, tmp, now)) { - list_add_tail(&tmp->contend, contend); - } + entrylk_contention_notify_check(this, tmp, now, contend); } } @@ -539,19 +542,17 @@ static int __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, pl_entry_lock_t *lock, int nonblock) { - struct timeval now; - if (nonblock) goto out; - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks); gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", pinode, lock->basename); + entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename, + ENTRYLK_LOCK, lock->type); out: return -EAGAIN; } @@ -605,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, } __pl_entrylk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->domain_list, &dom->entrylk_list); ret = 0; @@ -688,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + list_add_tail(&bl->blocked_locks, granted); } } - return; } /* Grants locks if possible which are blocked on a lock */ @@ -793,10 +793,9 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type); reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame, - conn_id); + conn_id, &op_errno); if (!reqlock) { op_ret = -1; - op_errno = ENOMEM; goto unwind; } @@ -932,8 +931,6 @@ out: op_ret, op_errno); unwind: STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL); - } else { - entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type); } if (pcontend != NULL) { diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index df00ede242d..d4e51d6e0a1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -9,16 +9,14 @@ */ #include <glusterfs/glusterfs.h> #include <glusterfs/compat.h> -#include <glusterfs/xlator.h> +#include <glusterfs/dict.h> #include <glusterfs/logging.h> -#include <glusterfs/common-utils.h> #include <glusterfs/list.h> #include <glusterfs/upcall-utils.h> #include "locks.h" #include "clear.h" #include "common.h" -#include "pl-messages.h" void __delete_inode_lock(pl_inode_lock_t *lock) @@ -142,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock, pl_inode_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; /* Question: Should we just prune them all given the * chance? Or just the locks we are attempting to acquire? */ if (inodelk_conflict(candidate_lock, requested_lock)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -231,9 +227,9 @@ out: return revoke_lock; } -static gf_boolean_t -__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, - struct timespec *now) +void +inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -243,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -252,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -265,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -353,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, break; } } - if (__inodelk_needs_contention_notify(this, l, now)) { - list_add_tail(&l->contend, contend); - } + inodelk_contention_notify_check(this, l, now, contend); } } @@ -401,15 +395,11 @@ static int __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, int can_block) { - struct timeval now; - if (can_block == 0) { goto out; } - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks); gf_msg_trace(this->name, 0, @@ -420,6 +410,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); + pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + lock->volume); out: return -EAGAIN; } @@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, struct list_head *contend) { pl_inode_lock_t *conf = NULL; - int ret = -EINVAL; + int ret; - conf = __inodelk_grantable(this, dom, lock, now, contend); - if (conf) { - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + ret = pl_inode_remove_inodelk(pl_inode, lock); + if (ret < 0) { + return ret; + } + if (ret == 0) { + conf = __inodelk_grantable(this, dom, lock, now, contend); + } + if ((ret > 0) || (conf != NULL)) { + return __lock_blocked_add(this, dom, lock, can_block); } /* To prevent blocked locks starvation, check if there are any blocked @@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, "starvation"); } - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + return __lock_blocked_add(this, dom, lock, can_block); } __pl_inodelk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->list, &dom->inodelk_list); - ret = 0; - -out: - return ret; + return 0; } /* Return true if the two inodelks have exactly same lock boundaries */ @@ -502,33 +495,36 @@ static pl_inode_lock_t * __inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) { pl_inode_lock_t *conf = NULL; + inode_t *inode = NULL; + + inode = lock->pl_inode->inode; conf = find_matching_inodelk(lock, dom); if (!conf) { gf_log(this->name, GF_LOG_ERROR, " Matching lock not found for unlock %llu-%llu, by %s " - "on %p", + "on %p for gfid:%s", (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner), - lock->client); + lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); goto out; } __delete_inode_lock(conf); gf_log(this->name, GF_LOG_DEBUG, - " Matching lock found for unlock %llu-%llu, by %s on %p", + " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s", (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, - lkowner_utoa(&lock->owner), lock->client); + lkowner_utoa(&lock->owner), lock->client, + inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); out: return conf; } -static void +void __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend) { - int bl_ret = 0; pl_inode_lock_t *bl = NULL; pl_inode_lock_t *tmp = NULL; @@ -541,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, { list_del_init(&bl->blocked_locks); - bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); + bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); - if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + if (bl->status != -EAGAIN) { + list_add_tail(&bl->blocked_locks, granted); } } - return; } -/* Grant all inodelks blocked on a lock */ void -grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct timespec *now, - struct list_head *contend) +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) { - struct list_head granted; pl_inode_lock_t *lock; pl_inode_lock_t *tmp; + int32_t op_ret; + int32_t op_errno; - INIT_LIST_HEAD(&granted); - - pthread_mutex_lock(&pl_inode->mutex); + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { - __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, - contend); - } - pthread_mutex_unlock(&pl_inode->mutex); - - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) - { - gf_log(this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lkowner_utoa(&lock->owner), lock->user_flock.l_start, - lock->user_flock.l_len); - + if (lock->status == 0) { + op_ret = 0; + op_errno = 0; + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + } else { + op_ret = -1; + op_errno = -lock->status; + } pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, - 0, 0, lock->volume); + op_ret, op_errno, lock->volume); - STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL); + STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL); lock->frame = NULL; } pthread_mutex_lock(&pl_inode->mutex); { - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { list_del_init(&lock->blocked_locks); __pl_inodelk_unref(lock); @@ -595,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, pthread_mutex_unlock(&pl_inode->mutex); } +/* Grant all inodelks blocked on a lock */ +void +grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) +{ + struct list_head granted; + + INIT_LIST_HEAD(&granted); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(this, pl_inode, &granted); +} + static void pl_inodelk_log_cleanup(pl_inode_lock_t *lock) { @@ -656,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) * and blocked lists, then this means that a parallel * unlock on another inodelk (L2 say) may have 'granted' * L1 and added it to 'granted' list in - * __grant_blocked_node_locks() (although using the + * __grant_blocked_inode_locks() (although using the * 'blocked_locks' member). In that case, the cleanup * codepath must try and grant other overlapping * blocked inodelks from other clients, now that L1 is @@ -741,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, gf_boolean_t need_inode_unref = _gf_false; struct list_head *pcontend = NULL; struct list_head contend; + struct list_head wake; struct timespec now = {}; short fl_type; @@ -792,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, timespec_now(&now); } + INIT_LIST_HEAD(&wake); + if (ctx) pthread_mutex_lock(&ctx->lock); pthread_mutex_lock(&pl_inode->mutex); @@ -814,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); - if (can_block) + if (can_block) { unref = _gf_false; - /* For all but the case where a non-blocking - * lock attempt fails, the extra ref taken at - * the start of this function must be negated. - */ - else - need_inode_unref = _gf_true; + } } - - if (ctx && (!ret || can_block)) + /* For all but the case where a non-blocking lock attempt fails + * with -EAGAIN, the extra ref taken at the start of this function + * must be negated. */ + need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block); + if (ctx && !need_inode_unref) { list_add_tail(&lock->client_list, &ctx->inodelk_lockers); + } } else { /* Irrespective of whether unlock succeeds or not, * the extra inode ref that was done at the start of @@ -843,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, list_del_init(&retlock->client_list); __pl_inodelk_unref(retlock); + pl_inode_remove_unlocked(this, pl_inode, &wake); + ret = 0; } out: @@ -853,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, if (ctx) pthread_mutex_unlock(&ctx->lock); + pl_inode_remove_wake(&wake); + /* The following (extra) unref corresponds to the ref that * was done at the time the lock was granted. */ @@ -873,17 +891,23 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, } /* Create a new inode_lock_t */ -pl_inode_lock_t * +static pl_inode_lock_t * new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, call_frame_t *frame, xlator_t *this, const char *volume, - char *conn_id) + char *conn_id, int32_t *op_errno) { pl_inode_lock_t *lock = NULL; + if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) { + *op_errno = EINVAL; + goto out; + } + lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t); if (!lock) { - return NULL; + *op_errno = ENOMEM; + goto out; } lock->fl_start = flock->l_start; @@ -911,6 +935,7 @@ new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, INIT_LIST_HEAD(&lock->contend); __pl_inodelk_ref(lock); +out: return lock; } @@ -955,6 +980,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, int ret = -1; GF_UNUSED int dict_ret = -1; int can_block = 0; + short lock_type = 0; pl_inode_t *pinode = NULL; pl_inode_lock_t *reqlock = NULL; pl_dom_list_t *dom = NULL; @@ -1005,11 +1031,10 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, } reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid, - frame, this, dom->domain, conn_id); + frame, this, dom->domain, conn_id, &op_errno); if (!reqlock) { op_ret = -1; - op_errno = ENOMEM; goto unwind; } @@ -1020,16 +1045,20 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, /* fall through */ case F_SETLK: + lock_type = flock->l_type; memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock)); ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom, inode); if (ret < 0) { - if ((can_block) && (F_UNLCK != flock->l_type)) { - pl_trace_block(this, frame, fd, loc, cmd, flock, volume); - goto out; + if (ret == -EAGAIN) { + if (can_block && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + } else { + gf_log(this->name, GF_LOG_TRACE, "returning %d", ret); } - gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); op_errno = -ret; goto unwind; } diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index b817960a4c4..c868eb494a2 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -43,9 +43,8 @@ struct __posix_lock { fd_t *fd; call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ /* These two together serve to uniquely identify each process across nodes */ @@ -85,9 +84,9 @@ struct __pl_inode_lock { call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -102,6 +101,9 @@ struct __pl_inode_lock { struct list_head client_list; /* list of all locks from a client */ short fl_type; + + int32_t status; /* Error code when we try to grant a lock in blocked + state */ }; typedef struct __pl_inode_lock pl_inode_lock_t; @@ -136,9 +138,9 @@ struct __entry_lock { const char *basename; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -164,13 +166,14 @@ struct __pl_inode { struct list_head rw_list; /* list of waiting r/w requests */ struct list_head reservelk_list; /* list of reservelks */ struct list_head blocked_reservelks; /* list of blocked reservelks */ - struct list_head - blocked_calls; /* List of blocked lock calls while a reserve is held*/ - struct list_head metalk_list; /* Meta lock list */ - /* This is to store the incoming lock - requests while meta lock is enabled */ - struct list_head queued_locks; - int mandatory; /* if mandatory locking is enabled */ + struct list_head blocked_calls; /* List of blocked lock calls while a + reserve is held*/ + struct list_head metalk_list; /* Meta lock list */ + struct list_head queued_locks; /* This is to store the incoming lock + requests while meta lock is enabled */ + struct list_head waiting; /* List of pending fops waiting to unlink/rmdir + the inode. */ + int mandatory; /* if mandatory locking is enabled */ inode_t *refkeeper; /* hold refs on an inode while locks are held to prevent pruning */ @@ -197,6 +200,13 @@ struct __pl_inode { */ int fop_wind_count; pthread_cond_t check_fop_wind_count; + + gf_boolean_t track_fop_wind_count; + + int32_t links; /* Number of hard links the inode has. */ + uint32_t remove_running; /* Number of remove operations running. */ + gf_boolean_t is_locked; /* Regular locks will be blocked. */ + gf_boolean_t removed; /* The inode has been deleted. */ }; typedef struct __pl_inode pl_inode_t; @@ -239,6 +249,7 @@ typedef struct { gf_boolean_t inodelk_count_req; gf_boolean_t posixlk_count_req; gf_boolean_t parent_entrylk_req; + gf_boolean_t multiple_dom_lk_requests; int update_mlock_enforced_flag; } pl_local_t; @@ -260,6 +271,13 @@ typedef struct _locks_ctx { struct list_head metalk_list; } pl_ctx_t; +typedef struct _multi_dom_lk_data { + xlator_t *this; + inode_t *inode; + dict_t *xdata_rsp; + gf_boolean_t keep_max; +} multi_dom_lk_data; + typedef enum { DECREMENT, INCREMENT } pl_count_op_t; pl_ctx_t * diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 0feb11e3b78..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -12,11 +12,8 @@ #include <limits.h> #include <pthread.h> -#include <glusterfs/glusterfs.h> #include <glusterfs/compat.h> -#include <glusterfs/xlator.h> #include <glusterfs/logging.h> -#include <glusterfs/common-utils.h> #include "locks.h" #include "common.h" @@ -24,7 +21,6 @@ #include "clear.h" #include <glusterfs/defaults.h> #include <glusterfs/syncop.h> -#include "pl-messages.h" #ifndef LLONG_MAX #define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ @@ -121,10 +117,15 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); #define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \ do { \ - if (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK) || \ + if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \ (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \ inode_t *__inode = (loc ? loc->inode : fd->inode); \ pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \ + if (__pl_inode == NULL) { \ + op_ret = -1; \ + op_errno = ENOMEM; \ + goto unwind; \ + } \ if (!pl_is_mandatory_locking_enabled(__pl_inode) || \ !priv->mlock_enforced) { \ op_ret = -1; \ @@ -147,16 +148,46 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); } \ } while (0) +#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \ + _args...) \ + ({ \ + struct list_head contend; \ + pl_inode_t *__pl_inode; \ + call_stub_t *__stub; \ + int32_t __error; \ + INIT_LIST_HEAD(&contend); \ + __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \ + &__pl_inode, &contend); \ + if (__error < 0) { \ + __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \ + __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \ + &contend); \ + } else if (__error == 0) { \ + PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \ + _loc2); \ + STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \ + FIRST_CHILD(_xl)->fops->_fop, ##_args); \ + } \ + __error; \ + }) + gf_boolean_t pl_has_xdata_requests(dict_t *xdata) { - static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, - GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT, - GLUSTERFS_PARENT_ENTRYLK, NULL}; - static int reqs_size[] = { - SLEN(GLUSTERFS_ENTRYLK_COUNT), SLEN(GLUSTERFS_INODELK_COUNT), - SLEN(GLUSTERFS_INODELK_DOM_COUNT), SLEN(GLUSTERFS_POSIXLK_COUNT), - SLEN(GLUSTERFS_PARENT_ENTRYLK), 0}; + static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, + GLUSTERFS_INODELK_COUNT, + GLUSTERFS_INODELK_DOM_COUNT, + GLUSTERFS_POSIXLK_COUNT, + GLUSTERFS_PARENT_ENTRYLK, + GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, + NULL}; + static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT), + SLEN(GLUSTERFS_INODELK_COUNT), + SLEN(GLUSTERFS_INODELK_DOM_COUNT), + SLEN(GLUSTERFS_POSIXLK_COUNT), + SLEN(GLUSTERFS_PARENT_ENTRYLK), + SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS), + 0}; int i = 0; if (!xdata) @@ -169,12 +200,22 @@ pl_has_xdata_requests(dict_t *xdata) return _gf_false; } +static int +dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data) +{ + dict_del(dict, key); + return 0; +} + void pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) { if (!local || !xdata) return; + GF_ASSERT(local->xdata == NULL); + local->xdata = dict_copy_with_ref(xdata, NULL); + if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) { local->entrylk_count_req = 1; dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT); @@ -183,6 +224,12 @@ pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) local->inodelk_count_req = 1; dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT); } + if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) { + local->multiple_dom_lk_requests = 1; + dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS); + dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", + dict_delete_domain_key, NULL); + } local->inodelk_dom_count_req = dict_get_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT); @@ -210,7 +257,7 @@ pl_needs_xdata_response(pl_local_t *local) if (local->parent_entrylk_req || local->entrylk_count_req || local->inodelk_dom_count_req || local->inodelk_count_req || - local->posixlk_count_req) + local->posixlk_count_req || local->multiple_dom_lk_requests) return _gf_true; return _gf_false; @@ -245,20 +292,19 @@ pl_track_io_fop_count(pl_local_t *local, xlator_t *this, pl_count_op_t op) if (!pl_inode) return -1; - if (pl_inode->mlock_enforced) { + if (pl_inode->mlock_enforced && pl_inode->track_fop_wind_count) { pthread_mutex_lock(&pl_inode->mutex); { if (op == DECREMENT) { pl_inode->fop_wind_count--; - if (pl_inode->fop_wind_count == 0) { + /* fop_wind_count can go negative when lock enforcement is + * enabled on unwind path of an IO. Hence the "<" comparision. + */ + if (pl_inode->fop_wind_count <= 0) { pthread_cond_broadcast(&pl_inode->check_fop_wind_count); - } - /* - Possible race where lock was enforced in the unwind path - if (pl_inode->fop_wind_count == -1) { + pl_inode->track_fop_wind_count = _gf_false; pl_inode->fop_wind_count = 0; } - */ } else { pl_inode->fop_wind_count++; } @@ -411,6 +457,80 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, } void +pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict, + char *domname, gf_boolean_t keep_max, char *key) +{ + int32_t count = 0; + int32_t maxcount = -1; + int ret = -1; + + if (keep_max) { + ret = dict_get_int32(dict, key, &maxcount); + if (ret < 0) + gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", + GLUSTERFS_INODELK_COUNT); + } + count = get_inodelk_count(this, inode, domname); + if (maxcount >= count) + return; + + ret = dict_set_int32(dict, key, count); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "Failed to set count for " + "key %s", + key); + } + + return; +} + +static int +pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, + void *data) +{ + multi_dom_lk_data *d = data; + char *tmp_key = NULL; + char *save_ptr = NULL; + + tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + + strtok_r(tmp_key, ":", &save_ptr); + if (!*save_ptr) { + if (tmp_key) + GF_FREE(tmp_key); + gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL, + "Could not tokenize domain string from key %s", key); + return -1; + } + + pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr, + d->keep_max, key); + if (tmp_key) + GF_FREE(tmp_key); + + return 0; +} + +void +pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local, + inode_t *inode, dict_t *dict, + gf_boolean_t keep_max) +{ + multi_dom_lk_data data; + + data.this = this; + data.inode = inode; + data.xdata_rsp = dict; + data.keep_max = keep_max; + + dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", + pl_inodelk_xattr_fill_multiple, &data); +} + +void pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, inode_t *inode, char *name, dict_t *xdata, gf_boolean_t max_lock) @@ -418,7 +538,7 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, if (!xdata || !local) return; - if (local->parent_entrylk_req && parent && name && strlen(name)) + if (local->parent_entrylk_req && parent && name && name[0] != '\0') pl_parent_entrylk_xattr_fill(this, parent, name, xdata, max_lock); if (!inode) @@ -437,6 +557,9 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, if (local->posixlk_count_req) pl_posixlk_xattr_fill(this, inode, xdata, max_lock); + + if (local->multiple_dom_lk_requests) + pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock); } /* Checks whether the region where fop is acting upon conflicts @@ -504,7 +627,9 @@ pl_check_n_create_fdctx(xlator_t *this, fd_t *fd) if (ret != 0) { GF_FREE(fdctx); fdctx = NULL; + UNLOCK(&fd->lock); gf_log(this->name, GF_LOG_DEBUG, "failed to set fd ctx"); + goto out; } } unlock: @@ -594,7 +719,8 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_DISCARD, &can_block); if (allowed == 1) { - if (pl_inode->mlock_enforced) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { pl_inode->fop_wind_count++; } goto unlock; @@ -719,7 +845,8 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_ZEROFILL, &can_block); if (allowed == 1) { - if (pl_inode->mlock_enforced) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { pl_inode->fop_wind_count++; } goto unlock; @@ -771,9 +898,6 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, { pl_local_t *local = frame->local; - if (local->xdata) - dict_unref(local->xdata); - pl_track_io_fop_count(local, this, DECREMENT); if (local->op == GF_FOP_TRUNCATE) @@ -867,7 +991,8 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, &can_block); if (allowed == 1) { - if (pl_inode->mlock_enforced) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { pl_inode->fop_wind_count++; } goto unlock; @@ -930,9 +1055,6 @@ unwind: "ret: %d, error: %s", op_ret, strerror(op_errno)); - if (local->xdata) - dict_unref(local->xdata); - switch (local->op) { case GF_FOP_TRUNCATE: PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf, @@ -1219,9 +1341,9 @@ pl_getxattr_clrlk(xlator_t *this, const char *name, inode_t *inode, out: GF_FREE(brickname); GF_FREE(args.opts); + GF_FREE(key); if (op_ret) { GF_FREE(lk_summary); - GF_FREE(key); } return op_ret; @@ -1433,8 +1555,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, goto out; } - len = dict_serialized_length(tmp); - if (len < 0) { + op_ret = dict_allocate_and_serialize(tmp, (char **)&buf, + (unsigned int *)&len); + if (op_ret != 0) { *op_errno = -op_ret; op_ret = -1; gf_log(this->name, GF_LOG_WARNING, @@ -1444,24 +1567,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, goto out; } - buf = GF_CALLOC(1, len, gf_common_mt_char); - if (buf == NULL) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - op_ret = dict_serialize(tmp, buf); - if (op_ret < 0) { - *op_errno = -op_ret; - op_ret = -1; - gf_log(this->name, GF_LOG_WARNING, - "dict_serialize failed (%s) while handling lockinfo " - "for fd (ptr: %p inode-gfid:%s)", - strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid)); - goto out; - } - op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len); if (op_ret < 0) { *op_errno = -op_ret; @@ -1943,8 +2048,10 @@ do_blocked_rw(pl_inode_t *pl_inode) if (__rw_allowable(pl_inode, &rw->region, rw->stub->fop)) { list_del_init(&rw->list); list_add_tail(&rw->list, &wind_list); - if (pl_inode->mlock_enforced) + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { pl_inode->fop_wind_count++; + } } } } @@ -2108,7 +2215,8 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_READ, &can_block); if (allowed == 1) { - if (pl_inode->mlock_enforced) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { pl_inode->fop_wind_count++; } goto unlock; @@ -2225,7 +2333,8 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_WRITE, &can_block); if (allowed == 1) { - if (pl_inode->mlock_enforced) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { pl_inode->fop_wind_count++; } goto unlock; @@ -2295,9 +2404,10 @@ __fd_has_locks(pl_inode_t *pl_inode, fd_t *fd) static posix_lock_t * lock_dup(posix_lock_t *lock) { + int32_t op_errno = 0; return new_posix_lock(&lock->user_flock, lock->client, lock->client_pid, &lock->owner, (fd_t *)lock->fd_num, lock->lk_flags, - lock->blocking); + lock->blocking, &op_errno); } static int @@ -2466,6 +2576,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, uint32_t lk_flags = 0; posix_locks_private_t *priv = this->private; pl_local_t *local = NULL; + short lock_type = 0; int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags); if (ret == 0) { @@ -2514,11 +2625,11 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, } reqlock = new_posix_lock(flock, frame->root->client, frame->root->pid, - &frame->root->lk_owner, fd, lk_flags, can_block); + &frame->root->lk_owner, fd, lk_flags, can_block, + &op_errno); if (!reqlock) { op_ret = -1; - op_errno = ENOMEM; goto unwind; } @@ -2610,6 +2721,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, case F_SETLK: reqlock->frame = frame; reqlock->this = this; + lock_type = flock->l_type; pthread_mutex_lock(&pl_inode->mutex); { @@ -2647,8 +2759,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, ret = pl_setlk(this, pl_inode, reqlock, can_block); if (ret == -1) { - if ((can_block) && (F_UNLCK != flock->l_type)) { - pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL); + if ((can_block) && (F_UNLCK != lock_type)) { goto out; } gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); @@ -2877,11 +2988,85 @@ out: return ret; } +static int32_t +pl_request_link_count(dict_t **pxdata) +{ + dict_t *xdata; + + xdata = *pxdata; + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + return ENOMEM; + } + } else { + dict_ref(xdata); + } + + if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) { + dict_unref(xdata); + return ENOMEM; + } + + *pxdata = xdata; + + return 0; +} + +static int32_t +pl_check_link_count(dict_t *xdata) +{ + int32_t count; + + /* In case we are unable to read the link count from xdata, we take a + * conservative approach and return -2, which will prevent the inode from + * being considered deleted. In fact it will cause link tracking for this + * inode to be disabled completely to avoid races. */ + + if (xdata == NULL) { + return -2; + } + + if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) { + return -2; + } + + return count; +} + int32_t pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { + pl_inode_t *pl_inode; + + if (op_ret >= 0) { + pl_inode = pl_inode_get(this, inode, NULL); + if (pl_inode == NULL) { + PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL, + NULL); + return 0; + } + + pthread_mutex_lock(&pl_inode->mutex); + + /* We only update the link count if we previously didn't know it. + * Doing it always can lead to races since lookup is not executed + * atomically most of the times. */ + if (pl_inode->links == -2) { + pl_inode->links = pl_check_link_count(xdata); + if (buf->ia_type == IA_IFDIR) { + /* Directories have at least 2 links. To avoid special handling + * for directories, we simply decrement the value here to make + * them equivalent to regular files. */ + pl_inode->links--; + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata, postparent); return 0; @@ -2890,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); + int32_t error; + + error = pl_request_link_count(&xdata); + if (error == 0) { + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref(xdata); + } else { + STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL); + } return 0; } @@ -3353,6 +3546,14 @@ pl_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, pthread_mutex_lock(&pl_inode->mutex); { + while (pl_inode->fop_wind_count > 0) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "waiting for existing fops (count %d) to drain for " + "gfid %s", + pl_inode->fop_wind_count, uuid_utoa(pl_inode->gfid)); + pthread_cond_wait(&pl_inode->check_fop_wind_count, + &pl_inode->mutex); + } pl_inode->mlock_enforced = _gf_true; pl_inode->check_mlock_info = _gf_false; } @@ -3409,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner, time_t *blkd_time, gf_boolean_t active) { char *type_str = NULL; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; @@ -3463,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode) { pl_dom_list_t *dom = NULL; pl_entry_lock_t *lock = NULL; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; int count = 0; @@ -3486,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->entrylk_list, domain_list) { - gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec, + gf_time_fmt(granted, sizeof(granted), lock->granted_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count); - if (lock->blkd_time.tv_sec == 0) { + if (lock->blkd_time == 0) { snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", @@ -3497,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode) lkowner_utoa(&lock->owner), lock->client, lock->connection_id, granted); } else { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" @@ -3514,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count); @@ -3566,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode) SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, - &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, - _gf_true); + lock->client, lock->connection_id, &lock->granted_time, + &lock->blkd_time, _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3580,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode) count); SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, 0, - &lock->blkd_time.tv_sec, _gf_false); + lock->client, lock->connection_id, 0, &lock->blkd_time, + _gf_false); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3614,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode) gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count, lock->blocked ? "BLOCKED" : "ACTIVE"); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->client_uid, &lock->granted_time.tv_sec, - &lock->blkd_time.tv_sec, - (lock->blocked) ? _gf_false : _gf_true); + lock->client, lock->client_uid, &lock->granted_time, + &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3700,6 +3899,10 @@ unlock: gf_proc_dump_write("posixlk-count", "%d", count); __dump_posixlks(pl_inode); } + + gf_proc_dump_write("links", "%d", pl_inode->links); + gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running); + gf_proc_dump_write("removed", "%u", pl_inode->removed); } pthread_mutex_unlock(&pl_inode->mutex); @@ -4011,6 +4214,10 @@ fini(xlator_t *this) if (!priv) return; this->private = NULL; + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } GF_FREE(priv->brickname); GF_FREE(priv); @@ -4041,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, struct iatt *postoldparent, struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent, postoldparent, prenewparent, postnewparent, xdata); + return 0; } @@ -4050,10 +4260,15 @@ int32_t pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); + int32_t error; + + error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename, + pl_rename_cbk, oldloc, newloc, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL, + NULL, NULL); + } - STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); return 0; } @@ -4177,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -4186,9 +4404,14 @@ int32_t pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + int32_t error; + + error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink, + pl_unlink_cbk, loc, xflag, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -4255,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -4264,9 +4490,14 @@ int pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); + int32_t error; + + error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir, + pl_rmdir_cbk, loc, xflags, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -4296,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_t *pl_inode = (pl_inode_t *)cookie; + + if (op_ret >= 0) { + pthread_mutex_lock(&pl_inode->mutex); + + /* TODO: can happen pl_inode->links == 0 ? */ + if (pl_inode->links >= 0) { + pl_inode->links++; + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf, preparent, postparent, xdata); return 0; @@ -4305,9 +4549,18 @@ int pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { + pl_inode_t *pl_inode; + + pl_inode = pl_inode_get(this, oldloc->inode, NULL); + if (pl_inode == NULL) { + STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); - STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); return 0; } @@ -4407,6 +4660,7 @@ pl_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, { pl_inode->mlock_enforced = _gf_false; pl_inode->check_mlock_info = _gf_false; + pl_inode->track_fop_wind_count = _gf_true; } pthread_mutex_unlock(&pl_inode->mutex); } @@ -4794,7 +5048,7 @@ struct volume_options options[] = { "be used in conjunction w/ revocation-clear-all."}, {.key = {"notify-contention"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "no", + .default_value = "yes", .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, .op_version = {GD_OP_VERSION_4_0_0}, .tags = {"locks", "contention"}, diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c index 51076d7cad1..604691fd887 100644 --- a/xlators/features/locks/src/reservelk.c +++ b/xlators/features/locks/src/reservelk.c @@ -312,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode) ret = pl_setlk(this, pl_inode, lock, can_block); if (ret == -1) { if (can_block) { - pl_trace_block(this, lock->frame, fd, NULL, cmd, - &lock->user_flock, NULL); continue; } else { gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c index 4989efb13d6..9c9047005d6 100644 --- a/xlators/features/marker/src/marker-common.c +++ b/xlators/features/marker/src/marker-common.c @@ -55,10 +55,3 @@ unlock: return ret; } - -int -marker_filter_quota_xattr(dict_t *dict, char *key, data_t *value, void *data) -{ - dict_del(dict, key); - return 0; -} diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h index 1b15561ec0b..7f8cffe7d35 100644 --- a/xlators/features/marker/src/marker-common.h +++ b/xlators/features/marker/src/marker-common.h @@ -16,6 +16,4 @@ int32_t marker_force_inode_ctx_get(inode_t *, xlator_t *, marker_inode_ctx_t **); -int -marker_filter_quota_xattr(dict_t *, char *, data_t *, void *); #endif diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c index 6edb285b180..ecd85d67b2b 100644 --- a/xlators/features/marker/src/marker-quota-helper.c +++ b/xlators/features/marker/src/marker-quota-helper.c @@ -378,96 +378,3 @@ mq_inode_ctx_new(inode_t *inode, xlator_t *this) { return __mq_inode_ctx_new(inode, this); } - -quota_local_t * -mq_local_new() -{ - quota_local_t *local = NULL; - - local = mem_get0(THIS->local_pool); - if (!local) - goto out; - - local->ref = 1; - LOCK_INIT(&local->lock); - - local->ctx = NULL; - local->contri = NULL; - -out: - return local; -} - -quota_local_t * -mq_local_ref(quota_local_t *local) -{ - LOCK(&local->lock); - { - local->ref++; - } - UNLOCK(&local->lock); - - return local; -} - -int32_t -mq_local_unref(xlator_t *this, quota_local_t *local) -{ - int32_t ref = 0; - if (local == NULL) - goto out; - - QUOTA_SAFE_DECREMENT(&local->lock, local->ref, ref); - - if (ref != 0) - goto out; - - if (local->fd != NULL) - fd_unref(local->fd); - - if (local->contri) - GF_REF_PUT(local->contri); - - if (local->xdata) - dict_unref(local->xdata); - - loc_wipe(&local->loc); - - loc_wipe(&local->parent_loc); - - LOCK_DESTROY(&local->lock); - - mem_put(local); -out: - return 0; -} - -inode_contribution_t * -mq_get_contribution_from_loc(xlator_t *this, loc_t *loc) -{ - int32_t ret = 0; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - ret = mq_inode_ctx_get(loc->inode, this, &ctx); - if (ret < 0) { - gf_log_callingfn(this->name, GF_LOG_WARNING, - "cannot get marker-quota context from inode " - "(gfid:%s, path:%s)", - uuid_utoa(loc->inode->gfid), loc->path); - goto err; - } - - contribution = mq_get_contribution_node(loc->parent, ctx); - if (contribution == NULL) { - gf_log_callingfn(this->name, GF_LOG_WARNING, - "inode (gfid:%s, path:%s) has " - "no contribution towards parent (gfid:%s)", - uuid_utoa(loc->inode->gfid), loc->path, - uuid_utoa(loc->parent->gfid)); - goto err; - } - -err: - return contribution; -} diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h index 99723def1b9..d4091dd2180 100644 --- a/xlators/features/marker/src/marker-quota-helper.h +++ b/xlators/features/marker/src/marker-quota-helper.h @@ -57,22 +57,10 @@ mq_delete_contribution_node(dict_t *, char *, inode_contribution_t *); int32_t mq_inode_loc_fill(const char *, inode_t *, loc_t *); -quota_local_t * -mq_local_new(); - -quota_local_t * -mq_local_ref(quota_local_t *); - -int32_t -mq_local_unref(xlator_t *, quota_local_t *); - inode_contribution_t * mq_contri_init(inode_t *inode); inode_contribution_t * mq_get_contribution_node(inode_t *, quota_inode_ctx_t *); -inode_contribution_t * -mq_get_contribution_from_loc(xlator_t *this, loc_t *loc); - #endif diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c index 530a6ff75fe..3de2ea1c92c 100644 --- a/xlators/features/marker/src/marker-quota.c +++ b/xlators/features/marker/src/marker-quota.c @@ -134,27 +134,14 @@ out: return -1; } -int32_t +static void mq_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t status) { GF_VALIDATE_OR_GOTO("marker", ctx, out); mq_set_ctx_status(ctx, &ctx->dirty_status, status); - return 0; -out: - return -1; -} - -int32_t -mq_test_and_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t *status) -{ - GF_VALIDATE_OR_GOTO("marker", ctx, out); - GF_VALIDATE_OR_GOTO("marker", status, out); - - mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, status); - return 0; out: - return -1; + return; } int @@ -866,19 +853,6 @@ out: } int32_t -mq_get_size(xlator_t *this, loc_t *loc, quota_meta_t *size) -{ - return _mq_get_metadata(this, loc, NULL, size, 0); -} - -int32_t -mq_get_contri(xlator_t *this, loc_t *loc, quota_meta_t *contri, - uuid_t contri_gfid) -{ - return _mq_get_metadata(this, loc, contri, NULL, contri_gfid); -} - -int32_t mq_get_delta(xlator_t *this, loc_t *loc, quota_meta_t *delta, quota_inode_ctx_t *ctx, inode_contribution_t *contribution) { @@ -1337,19 +1311,6 @@ out: return ret; } -int -mq_create_xattrs_blocking_txn(xlator_t *this, loc_t *loc, struct iatt *buf) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO("marker", loc, out); - GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - - ret = _mq_create_xattrs_txn(this, loc, buf, _gf_false); -out: - return ret; -} - int32_t mq_reduce_parent_size_task(void *opaque) { @@ -1752,21 +1713,17 @@ mq_initiate_quota_task(void *opaque) } out: - if (dirty) { - if (ret < 0) { - /* On failure clear dirty status flag. - * In the next lookup inspect_directory_xattr - * can set the status flag and fix the - * dirty directory. - * Do the same if the dir was dirty before - * txn - */ - ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); - if (ret == 0) - mq_set_ctx_dirty_status(parent_ctx, _gf_false); - } else { - ret = mq_mark_dirty(this, &parent_loc, 0); - } + if ((dirty) && (ret < 0)) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if the dir was dirty before + * txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); } if (locked) @@ -1977,7 +1934,7 @@ mq_update_dirty_inode_task(void *opaque) /* Inculde for self */ contri_sum.dir_count++; - ret = mq_get_size(this, loc, &size); + ret = _mq_get_metadata(this, loc, NULL, &size, 0); if (ret < 0) goto out; @@ -2046,8 +2003,8 @@ mq_update_dirty_inode_txn(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx) GF_VALIDATE_OR_GOTO("marker", loc, out); GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - ret = mq_test_and_set_ctx_dirty_status(ctx, &status); - if (ret < 0 || status == _gf_true) + mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, &status); + if (status == _gf_true) goto out; ret = mq_synctask(this, mq_update_dirty_inode_task, _gf_true, loc); @@ -2102,6 +2059,9 @@ mq_inspect_directory_xattr(xlator_t *this, quota_inode_ctx_t *ctx, if (ret < 0) goto create_xattr; + if (!contribution) + goto create_xattr; + if (!loc_is_root(loc)) { GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen); if (keylen < 0) { diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h index e6271d77084..4bbf6878b22 100644 --- a/xlators/features/marker/src/marker-quota.h +++ b/xlators/features/marker/src/marker-quota.h @@ -23,15 +23,6 @@ #define QUOTA_KEY_MAX 512 #define READDIR_BUF 4096 -#define QUOTA_STACK_DESTROY(_frame, _this) \ - do { \ - quota_local_t *_local = NULL; \ - _local = _frame->local; \ - _frame->local = NULL; \ - STACK_DESTROY(_frame->root); \ - mq_local_unref(_this, _local); \ - } while (0) - #define QUOTA_ALLOC(var, type, ret) \ do { \ ret = 0; \ diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index b6ce42a025e..1375ccc498c 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -242,24 +242,19 @@ out: return ret; } -int32_t +void marker_error_handler(xlator_t *this, marker_local_t *local, int32_t op_errno) { - marker_conf_t *priv = NULL; - const char *path = NULL; - - priv = (marker_conf_t *)this->private; - path = local ? (local->loc.path ? local->loc.path - : uuid_utoa(local->loc.gfid)) - : "<nul>"; + marker_conf_t *priv = (marker_conf_t *)this->private; + const char *path = local ? ((local->loc.path) ? local->loc.path + : uuid_utoa(local->loc.gfid)) + : "<nul>"; gf_log(this->name, GF_LOG_CRITICAL, "Indexing gone corrupt at %s (reason: %s)." " Geo-replication slave content needs to be revalidated", path, strerror(op_errno)); sys_unlink(priv->timestamp_file); - - return 0; } int32_t @@ -567,24 +562,21 @@ marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { int32_t ret = 0; - int32_t done = 0; + int32_t done = 1; marker_local_t *local = NULL; local = (marker_local_t *)frame->local; if (op_ret == -1 && op_errno == ENOSPC) { marker_error_handler(this, local, op_errno); - done = 1; goto out; } if (local) { if (local->loc.path && strcmp(local->loc.path, "/") == 0) { - done = 1; goto out; } if (__is_root_gfid(local->loc.gfid)) { - done = 1; goto out; } } @@ -595,14 +587,11 @@ marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, gf_log(this->name, GF_LOG_DEBUG, "Error occurred " "while traversing to the parent, stopping marker"); - - done = 1; - goto out; } marker_start_setxattr(frame, this); - + done = 0; out: if (done) { marker_setxattr_done(frame); diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/metadisp/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am new file mode 100644 index 00000000000..1520ad8c424 --- /dev/null +++ b/xlators/features/metadisp/src/Makefile.am @@ -0,0 +1,38 @@ +noinst_PYTHON = gen-fops.py + +EXTRA_DIST = fops-tmpl.c + +xlator_LTLIBRARIES = metadisp.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +nodist_metadisp_la_SOURCES = fops.c + +BUILT_SOURCES = fops.c + +metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +metadisp_la_SOURCES = metadisp.c \ + metadisp-unlink.c \ + metadisp-stat.c \ + metadisp-lookup.c \ + metadisp-readdir.c \ + metadisp-create.c \ + metadisp-open.c \ + metadisp-fsync.c \ + metadisp-setattr.c \ + backend.c + +metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = metadisp.h metadisp-fops.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py + PYTHONPATH=$(top_srcdir)/libglusterfs/src \ + $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@ + +CLEANFILES = $(nodist_metadisp_la_SOURCES) diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c new file mode 100644 index 00000000000..ee2c25bfaa7 --- /dev/null +++ b/xlators/features/metadisp/src/backend.c @@ -0,0 +1,45 @@ +#define GFID_STR_LEN 37 + +#include "metadisp.h" + +/* + * backend.c + * + * functions responsible for converting user-facing paths to backend-style + * "/$GFID" paths. + */ + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc) +{ + static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char gfid_buf[GFID_STR_LEN + 1] = { + 0, + }; + char *path = NULL; + + GF_VALIDATE_OR_GOTO("metadisp", src_loc, out); + GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out); + + loc_copy(dst_loc, src_loc); + memcpy(dst_loc->pargfid, root, sizeof(root)); + GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke + // whatever loc_copy gave us + + uuid_utoa_r(gfid, gfid_buf); + + path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char), + gf_common_mt_char); // freed via loc_wipe + + path[0] = '/'; + strncpy(path + 1, gfid_buf, GFID_STR_LEN); + path[GFID_STR_LEN] = 0; + dst_loc->path = path; + if (src_loc->name) + dst_loc->name = strrchr(dst_loc->path, '/'); + if (dst_loc->name) + dst_loc->name++; + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c new file mode 100644 index 00000000000..4385b7dd5b7 --- /dev/null +++ b/xlators/features/metadisp/src/fops-tmpl.c @@ -0,0 +1,10 @@ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/xlator.h> +#include "metadisp.h" +#include "metadisp-fops.h" + +#pragma generate diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py new file mode 100644 index 00000000000..8b5e120fdec --- /dev/null +++ b/xlators/features/metadisp/src/gen-fops.py @@ -0,0 +1,160 @@ +#!/usr/bin/python + +import sys +from generator import fop_subs, generate + +FN_METADATA_CHILD_GENERIC = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ metadata"); + STACK_WIND (frame, default_@NAME@_cbk, + METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_GENERIC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ generic"); + STACK_WIND (frame, default_@NAME@_cbk, + DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATAFD_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ datafd"); + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATALOC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ dataloc"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} +""" + +FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@," + +skipped = [ + "readdir", + "readdirp", + "lookup", + "fsync", + "stat", + "open", + "create", + "unlink", + "setattr", + # TODO: implement "inodelk", +] + + +def gen_fops(): + done = skipped + + # + # these are fops that wind to the DATA_CHILD + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "writev", + "readv", + "ftruncate", + "zerofill", + "discard", + "seek", + "fstat", + ]: + done = done + [name] + print(generate(FN_DATAFD_TEMPLATE, name, fop_subs)) + + for name in ["truncate"]: + done = done + [name] + print(generate(FN_DATALOC_TEMPLATE, name, fop_subs)) + + # these are fops that operate solely on dentries, folders, + # or extended attributes. Therefore, they must always + # wind to METADATA_CHILD and should never perform + # any path rewriting + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "mkdir", + "symlink", + "link", + "rename", + "mknod", + "opendir", + # "readdir, # special-cased + # "readdirp, # special-cased + "fsyncdir", + # "setattr", # special-cased + "readlink", + "fentrylk", + "access", + # TODO: these wind to both, + # data for backend-attributes and metadata for the rest + "xattrop", + "setxattr", + "getxattr", + "removexattr", + "fgetxattr", + "fsetxattr", + "fremovexattr", + ]: + + done = done + [name] + print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs)) + + print("struct xlator_fops fops = {") + for name in done: + print(generate(FOPS_LINE_TEMPLATE, name, fop_subs)) + + print("};") + + +for l in open(sys.argv[1], "r").readlines(): + if l.find("#pragma generate") != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_fops() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c new file mode 100644 index 00000000000..f8c9798dd59 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-create.c @@ -0,0 +1,101 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Create, like stat, is a two-step process. We send a create + * to the METADATA_CHILD, then send another create to the DATA_CHILD. + * + * We do the metadata child first to ensure that the ACLs are enforced. + */ + +int32_t +metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + // create the backend data inode + STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +int32_t +metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = cookie; + if (op_ret != 0) { + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; + } + + if (stub == NULL) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + return 0; + } + + call_resume(stub); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("."); + + loc_t backend_loc = { + 0, + }; + call_stub_t *stub = NULL; + uuid_t *gfid_req = NULL; + + RESOLVE_GFID_REQ(xdata, gfid_req, out); + + if (build_backend_loc(*gfid_req, loc, &backend_loc)) { + goto unwind; + } + + frame->local = loc; + + stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags, + mode, umask, fd, xdata); + + STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->create, loc, flags, mode, + umask, fd, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h new file mode 100644 index 00000000000..56dd427cf34 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fops.h @@ -0,0 +1,51 @@ +#ifndef GF_METADISP_FOPS_H_ +#define GF_METADISP_FOPS_H_ + +#include <glusterfs/xlator.h> +#include <glusterfs/dict.h> +#include <glusterfs/glusterfs.h> + +#include <sys/types.h> + +/* fops in here are defined in their own file. Every other fop is just defined + * inline of fops.c */ + +int +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata); + +int +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict); + +int +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata); + +int +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +#endif diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c new file mode 100644 index 00000000000..2e46fa84eac --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fsync.c @@ -0,0 +1,54 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, dict_t *xdata) +{ + STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} + +int32_t +metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata); + STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c new file mode 100644 index 00000000000..27d90c9f746 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-lookup.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Lookup, like stat, is a two-step process for grabbing the metadata details + * as well as the data details. + */ + +int32_t +metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + METADISP_TRACE("backend_lookup_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +} + +int32_t +metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("backend_lookup_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->lookup, &backend_loc, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + + // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + METADISP_TRACE("lookup"); + call_stub_t *stub = NULL; + stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata); + STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c new file mode 100644 index 00000000000..64814afe636 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-open.c @@ -0,0 +1,70 @@ +#include <glusterfs/call-stub.h> +#include "metadisp.h" + +int32_t +metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("got open results %d %d", op_ret, op_errno); + + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (!stub) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, fd_t *fd, dict_t *xdata) +{ + STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this), + DATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd, + xdata); + STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c new file mode 100644 index 00000000000..5f840b1e88f --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-readdir.c @@ -0,0 +1,65 @@ +#include "metadisp.h" + +/** + * With a change to the posix xlator, readdir and readdirp are shockingly + * simple. + * + * The issue with separating the backend data of the files + * with the metadata is that readdirs must now read from multiple sources + * to coalesce the directory entries. + * + * The way we do this is to tell the METADATA_CHILD that when it's + * running readdirp, each file entry should have a stat wound to + * 'stat-source-of-truth'. + * + * see metadisp_stat for how it handles winds _from_posix. + */ + +int32_t +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + /* + * Always use readdirp, even if the original was readdir. Why? Because NFS. + * There are multiple translations between Gluster, UNIX, and NFS stat + * structures in that path. One of them uses the type etc. from the stat + * structure, which is only filled in by readdirp. If we use readdir, the + * entries do actually go all the way back to the client and are visible in + * getdents, but then the readdir throws them away because of the + * uninitialized type. + */ + GF_UNUSED int32_t ret; + if (!xdata) { + xdata = dict_new(); + } + + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + return 0; +} + +int32_t +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + if (!xdata) { + xdata = dict_new(); + } + GF_UNUSED int32_t ret; + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c new file mode 100644 index 00000000000..6991cf644f3 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-setattr.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; +} + +int32_t +metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid, + xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(statpost->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + METADISP_TRACE("setattr"); + call_stub_t *stub = NULL; + stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf, + valid, xdata); + STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c new file mode 100644 index 00000000000..b06d0dbcddd --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-stat.c @@ -0,0 +1,124 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The stat flow in METADISP is complicated because we must + * do ensure a few things: + * 1. stat, on the path within the metadata layer, + * MUST get the backend FD of the data layer. + * --- we wind to the metadata layer, then the data layer. + * + * 2. the metadata layer MUST be able to ask the data + * layer for stat information. + * --- this is 'syncop-internal-from-posix' + * + * 3. when the metadata exists BUT the data is missing, + * we MUST mark the backend file as bad and heal it. + */ + +int32_t +metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno); + if (op_errno == ENOENT) { + STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL); + return 0; + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("winding stat to path %s", loc->path); + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("bad object, sending EUCLEAN"); + STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL); + return 0; + } + + STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + METADISP_TRACE("got stat results %d %d", op_ret, op_errno); + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // only use the stub for the files + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int32_t ret = 0; + loc_t backend_loc = { + 0, + }; + METADISP_FILTER_ROOT(stat, loc, xdata); + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) { + // if we've just been sent a stat from posix, then we know + // that we must send down a stat for a file to the second child. + // + // that means we can skip the stat for the first child and just + // send to the data disk. + METADISP_TRACE("got syncop-internal-from-posix"); + STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->stat, &backend_loc, xdata); + return 0; + } + + // we do not know if the request is for a file, folder, etc. wind + // to first child to find out. + stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata); + METADISP_TRACE("winding stat to first child %s", loc->path); + STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->stat, loc, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c new file mode 100644 index 00000000000..1f6a8eb35ce --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-unlink.c @@ -0,0 +1,160 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The unlink flow in metadisp is complicated because we must + * do ensure that UNLINK causes both the metadata objects + * to get removed and the data objects to get removed. + */ + +int32_t +metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) +{ + METADISP_TRACE("winding backend unlink to path %s", loc->path); + STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE(". %d %d", op_ret, op_errno); + + int ret = 0; + call_stub_t *stub = NULL; + int nlink = 0; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret != 0) { + op_errno = EINVAL; + op_ret = -1; + goto unwind; + } + METADISP_TRACE("frontend hardlink count %d %d", ret, nlink); + if (nlink > 1) { + goto unwind; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; +} + +int32_t +metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + call_stub_t *stub = NULL; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // fail fast on empty gfid so we don't loop forever + if (gf_uuid_is_null(buf->ia_gfid)) { + op_ret = -1; + op_errno = ENODATA; + goto unwind; + } + + // fill gfid since the stub is incomplete + memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t)); + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("winding lookup for unlink to path %s", loc->path); + + // loop back to ourselves after a lookup + stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata); + STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub, + METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; + } + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + // + // ensure we get the link count on the unlink response, so we can + // account for hardlinks before winding to the backend. + // NOTE: + // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation + // is needed to ensure that multiple requests will work in the same + // xlator stack. + // + if (!xdata) { + xdata = dict_new(); + } + dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + + METADISP_TRACE("winding frontend unlink to path %s", loc->path); + stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag, + xdata); + + STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c new file mode 100644 index 00000000000..3c8f150cebc --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.c @@ -0,0 +1,46 @@ +#include <glusterfs/call-stub.h> + +#include "metadisp.h" +#include "metadisp-fops.h" + +int32_t +init(xlator_t *this) +{ + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with children. exiting"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + return 0; +} + +void +fini(xlator_t *this) +{ + return; +} + +/* defined in fops.c */ +struct xlator_fops fops; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .fops = &fops, + .cbks = &cbks, + .options = options, + .op_version = {1}, + .identifier = "metadisp", + .category = GF_EXPERIMENTAL, +}; diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h new file mode 100644 index 00000000000..c8fd7a13c04 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef GF_METADISP_H_ +#define GF_METADISP_H_ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> + +#define METADATA_CHILD(_this) FIRST_CHILD(_this) +#define DATA_CHILD(_this) SECOND_CHILD(_this) + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc); + +#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args) + +#define METADISP_FILTER_ROOT(_op, _args...) \ + if (strcmp(loc->path, "/") == 0) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \ + if (__is_root_gfid(_gfid)) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \ + VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \ + _lbl) + +#endif /* __TEMPLATE_H__ */ diff --git a/xlators/features/namespace/src/namespace.c b/xlators/features/namespace/src/namespace.c index 59045e8647b..86c5ebee900 100644 --- a/xlators/features/namespace/src/namespace.c +++ b/xlators/features/namespace/src/namespace.c @@ -16,7 +16,6 @@ #include <sys/types.h> #include <glusterfs/defaults.h> -#include <glusterfs/glusterfs.h> #include <glusterfs/hashfn.h> #include <glusterfs/logging.h> #include "namespace.h" diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c index bfd1116a568..0e5eb60a16f 100644 --- a/xlators/features/quiesce/src/quiesce.c +++ b/xlators/features/quiesce/src/quiesce.c @@ -89,11 +89,12 @@ gf_quiesce_populate_failover_hosts(xlator_t *this, quiesce_priv_t *priv, if (!dup_val) goto out; + addr_tok = strtok_r(dup_val, ",", &save_ptr); LOCK(&priv->lock); { if (!list_empty(&priv->failover_list)) __gf_quiesce_cleanup_failover_hosts(this, priv); - addr_tok = strtok_r(dup_val, ",", &save_ptr); + while (addr_tok) { if (!valid_internet_address(addr_tok, _gf_true, _gf_false)) { gf_msg(this->name, GF_LOG_INFO, 0, QUIESCE_MSG_INVAL_HOST, @@ -1192,6 +1193,33 @@ quiesce_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, } int32_t +quiesce_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND(frame, default_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; + } + + stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; +} + +int32_t quiesce_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { @@ -2364,19 +2392,10 @@ quiesce_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, { quiesce_priv_t *priv = NULL; call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; priv = this->private; if (priv && priv->pass_through) { - local = mem_get0(priv->local_pool); - local->fd = fd_ref(fd); - local->offset = offset; - local->len = len; - local->flag = mode; - - frame->local = local; - STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, xdata); @@ -2536,6 +2555,7 @@ fini(xlator_t *this) this->private = NULL; mem_pool_destroy(priv->local_pool); + priv->local_pool = NULL; LOCK_DESTROY(&priv->lock); GF_FREE(priv); out: @@ -2592,7 +2612,9 @@ struct xlator_fops fops = { .truncate = quiesce_truncate, .ftruncate = quiesce_ftruncate, .setxattr = quiesce_setxattr, + .fsetxattr = quiesce_fsetxattr, .removexattr = quiesce_removexattr, + .fremovexattr = quiesce_fremovexattr, .symlink = quiesce_symlink, .unlink = quiesce_unlink, .link = quiesce_link, @@ -2625,6 +2647,7 @@ struct xlator_fops fops = { .access = quiesce_access, .readlink = quiesce_readlink, .getxattr = quiesce_getxattr, + .fgetxattr = quiesce_fgetxattr, .open = quiesce_open, .readv = quiesce_readv, .flush = quiesce_flush, diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c index 1a4c2e30dd6..480d64ade27 100644 --- a/xlators/features/quota/src/quota-enforcer-client.c +++ b/xlators/features/quota/src/quota-enforcer-client.c @@ -32,12 +32,6 @@ #include <malloc.h> #endif -#ifdef HAVE_MALLOC_STATS -#ifdef DEBUG -#include <mcheck.h> -#endif -#endif - #include "quota.h" #include "quota-messages.h" @@ -362,16 +356,28 @@ quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata, { xlator_t *this = NULL; int ret = 0; + quota_priv_t *priv = NULL; this = mydata; - + priv = this->private; switch (event) { case RPC_CLNT_CONNECT: { + pthread_mutex_lock(&priv->conn_mutex); + { + priv->conn_status = _gf_true; + } + pthread_mutex_unlock(&priv->conn_mutex); gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT"); break; } case RPC_CLNT_DISCONNECT: { + pthread_mutex_lock(&priv->conn_mutex); + { + priv->conn_status = _gf_false; + pthread_cond_signal(&priv->conn_cond); + } + pthread_mutex_unlock(&priv->conn_mutex); gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT"); break; } diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 8812a301924..18df9ae6d19 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -7,13 +7,9 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include <fnmatch.h> #include "quota.h" -#include <glusterfs/common-utils.h> -#include <glusterfs/defaults.h> #include <glusterfs/statedump.h> -#include <glusterfs/quota-common-utils.h> #include "quota-messages.h" #include <glusterfs/events.h> @@ -564,15 +560,14 @@ quota_handle_validate_error(call_frame_t *frame, int32_t op_ret, if (local == NULL) goto out; - LOCK(&local->lock); - { - if (op_ret < 0) { + if (op_ret < 0) { + LOCK(&local->lock); + { local->op_ret = op_ret; local->op_errno = op_errno; } + UNLOCK(&local->lock); } - UNLOCK(&local->lock); - /* we abort checking limits on this path to root */ quota_link_count_decrement(frame); out: @@ -591,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -631,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * loop of validation and checking * limit when timeout is zero. */ - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -649,27 +640,10 @@ unwind: return 0; } -static uint64_t -quota_time_elapsed(struct timeval *now, struct timeval *then) -{ - return (now->tv_sec - then->tv_sec); -} - -int32_t -quota_timeout(struct timeval *tv, int32_t timeout) +static inline gf_boolean_t +quota_timeout(time_t t, uint32_t timeout) { - struct timeval now = { - 0, - }; - int32_t timed_out = 0; - - gettimeofday(&now, NULL); - - if (quota_time_elapsed(&now, tv) >= timeout) { - timed_out = 1; - } - - return timed_out; + return (gf_time() - t) >= timeout; } /* Return: 1 if new entry added @@ -1133,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if ((object_aggr_count) > ctx->object_hard_lim) { hard_limit_exceeded = 1; @@ -1200,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if (wouldbe_size >= ctx->hard_lim) { hard_limit_exceeded = 1; @@ -1769,19 +1743,13 @@ quota_writev_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, if ((op_errno == EDQUOT) && (local->space_available > 0)) { new_count = iov_subset(vector, count, 0, local->space_available, - NULL); - - new_vector = GF_CALLOC(new_count, sizeof(struct iovec), - gf_common_mt_iovec); - if (new_vector == NULL) { + &new_vector, 0); + if (new_count < 0) { local->op_ret = -1; local->op_errno = ENOMEM; goto unwind; } - new_count = iov_subset(vector, count, 0, local->space_available, - new_vector); - vector = new_vector; count = new_count; } else if (op_errno == ENOENT || op_errno == ESTALE) { @@ -1921,10 +1889,12 @@ quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, par_inode = do_quota_check_limit(frame, fd->inode, this, dentry, _gf_false); if (par_inode == NULL) { - /* remove stale entry from inode ctx */ - quota_dentry_del(ctx, dentry->name, dentry->par); - parents--; - fail_count++; + if (ctx) { + /* remove stale entry from inode ctx */ + quota_dentry_del(ctx, dentry->name, dentry->par); + parents--; + fail_count++; + } } else { inode_unref(par_inode); } @@ -3292,12 +3262,11 @@ quota_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - LOCK(&ctx->lock); - { - if (buf) - ctx->buf = *buf; + if (buf) { + LOCK(&ctx->lock); + ctx->buf = *buf; + UNLOCK(&ctx->lock); } - UNLOCK(&ctx->lock); out: QUOTA_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); @@ -3371,12 +3340,11 @@ quota_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - LOCK(&ctx->lock); - { - if (buf) - ctx->buf = *buf; + if (buf) { + LOCK(&ctx->lock); + ctx->buf = *buf; + UNLOCK(&ctx->lock); } - UNLOCK(&ctx->lock); out: QUOTA_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); @@ -3666,12 +3634,11 @@ quota_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - LOCK(&ctx->lock); - { - if (statpost) - ctx->buf = *statpost; + if (statpost) { + LOCK(&ctx->lock); + ctx->buf = *statpost; + UNLOCK(&ctx->lock); } - UNLOCK(&ctx->lock); out: QUOTA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost, @@ -4326,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -4360,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; } - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -4885,7 +4848,7 @@ off: void quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, - char **path, struct timeval *cur_time) + char **path, time_t *cur_time) { xlator_t *this = THIS; @@ -4904,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, if (!(*path)) *path = uuid_utoa(inode->gfid); - gettimeofday(cur_time, NULL); + *cur_time = gf_time(); } /* Logs if @@ -4915,9 +4878,7 @@ void quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, int64_t delta) { - struct timeval cur_time = { - 0, - }; + time_t cur_time = 0; char *usage_str = NULL; char *path = NULL; int64_t cur_size = 0; @@ -4943,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } /* Usage is above soft limit */ else if (cur_size > ctx->soft_lim && - quota_timeout(&ctx->prev_log, priv->log_timeout)) { + quota_timeout(ctx->prev_log_time, priv->log_timeout)) { quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time); gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT, @@ -4959,9 +4920,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } + if (path) + GF_FREE(path); + if (usage_str) GF_FREE(usage_str); } @@ -5017,6 +4981,43 @@ quota_forget(xlator_t *this, inode_t *inode) return 0; } +int +notify(xlator_t *this, int event, void *data, ...) +{ + quota_priv_t *priv = NULL; + int ret = 0; + rpc_clnt_t *rpc = NULL; + gf_boolean_t conn_status = _gf_true; + xlator_t *victim = data; + + priv = this->private; + if (!priv || !priv->is_quota_on) + goto out; + + if (event == GF_EVENT_PARENT_DOWN) { + rpc = priv->rpc_clnt; + if (rpc) { + rpc_clnt_disable(rpc); + pthread_mutex_lock(&priv->conn_mutex); + { + conn_status = priv->conn_status; + while (conn_status) { + (void)pthread_cond_wait(&priv->conn_cond, + &priv->conn_mutex); + conn_status = priv->conn_status; + } + } + pthread_mutex_unlock(&priv->conn_mutex); + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); + } + } + +out: + ret = default_notify(this, event, data); + return ret; +} + int32_t init(xlator_t *this) { @@ -5059,6 +5060,10 @@ init(xlator_t *this) goto err; } + pthread_mutex_init(&priv->conn_mutex, NULL); + pthread_cond_init(&priv->conn_cond, NULL); + priv->conn_status = _gf_false; + if (priv->is_quota_on) { rpc = quota_enforcer_init(this, this->options); if (rpc == NULL) { @@ -5152,9 +5157,9 @@ quota_priv_dump(xlator_t *this) if (ret) goto out; else { - gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout); - gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout); - gf_proc_dump_write("alert-time", "%d", priv->log_timeout); + gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout); + gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout); + gf_proc_dump_write("alert-time", "%u", priv->log_timeout); gf_proc_dump_write("quota-on", "%d", priv->is_quota_on); gf_proc_dump_write("statfs", "%d", priv->consider_statfs); gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid); @@ -5172,20 +5177,22 @@ fini(xlator_t *this) { quota_priv_t *priv = NULL; rpc_clnt_t *rpc = NULL; - int i = 0, cnt = 0; priv = this->private; if (!priv) return; rpc = priv->rpc_clnt; priv->rpc_clnt = NULL; - this->private = NULL; if (rpc) { - cnt = GF_ATOMIC_GET(rpc->refcount); - for (i = 0; i < cnt; i++) - rpc_clnt_unref(rpc); + rpc_clnt_connection_cleanup(&rpc->conn); + rpc_clnt_unref(rpc); } + + this->private = NULL; LOCK_DESTROY(&priv->lock); + pthread_mutex_destroy(&priv->conn_mutex); + pthread_cond_destroy(&priv->conn_cond); + GF_FREE(priv); if (this->local_pool) { mem_pool_destroy(this->local_pool); @@ -5317,6 +5324,7 @@ struct volume_options options[] = { xlator_api_t xlator_api = { .init = init, .fini = fini, + .notify = notify, .reconfigure = reconfigure, .mem_acct_init = mem_acct_init, .op_version = {1}, /* Present from the initial version */ diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h index a5a99cac090..0395d78c9ef 100644 --- a/xlators/features/quota/src/quota.h +++ b/xlators/features/quota/src/quota.h @@ -10,10 +10,7 @@ #ifndef _QUOTA_H #define _QUOTA_H -#include <glusterfs/xlator.h> #include <glusterfs/call-stub.h> -#include <glusterfs/defaults.h> -#include <glusterfs/common-utils.h> #include "quota-mem-types.h" #include <glusterfs/glusterfs.h> #include <glusterfs/compat.h> @@ -156,8 +153,8 @@ struct quota_inode_ctx { int64_t object_soft_lim; struct iatt buf; struct list_head parents; - struct timeval tv; - struct timeval prev_log; + time_t validate_time; + time_t prev_log_time; gf_boolean_t ancestry_built; gf_lock_t lock; }; @@ -202,6 +199,7 @@ struct quota_local { typedef struct quota_local quota_local_t; struct quota_priv { + /* FIXME: consider time_t for timeouts. */ uint32_t soft_timeout; uint32_t hard_timeout; uint32_t log_timeout; @@ -217,6 +215,9 @@ struct quota_priv { char *volume_uuid; uint64_t validation_count; int32_t quotad_conn_status; + pthread_mutex_t conn_mutex; + pthread_cond_t conn_cond; + gf_boolean_t conn_status; }; typedef struct quota_priv quota_priv_t; diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c index 379bc05af27..75d47867b5b 100644 --- a/xlators/features/quota/src/quotad-aggregator.c +++ b/xlators/features/quota/src/quotad-aggregator.c @@ -13,7 +13,14 @@ #include "quotad-helpers.h" #include "quotad-aggregator.h" -struct rpcsvc_program quotad_aggregator_prog; +static char *qd_ext_xattrs[] = { + QUOTA_SIZE_KEY, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + NULL, +}; + +static struct rpcsvc_program quotad_aggregator_prog; struct iobuf * quotad_serialize_reply(rpcsvc_request_t *req, void *arg, struct iovec *outmsg, @@ -141,7 +148,7 @@ quotad_aggregator_getlimit_cbk(xlator_t *this, call_frame_t *frame, if (xdata) { state = frame->root->state; - ret = dict_get_int32n(state->xdata, "type", SLEN("type"), &type); + ret = dict_get_int32n(state->req_xdata, "type", SLEN("type"), &type); if (ret < 0) goto out; @@ -169,8 +176,9 @@ out: } reply: - quotad_aggregator_submit_reply(frame, frame->local, (void *)&cli_rsp, NULL, - 0, NULL, (xdrproc_t)xdr_gf_cli_rsp); + quotad_aggregator_submit_reply(frame, (frame) ? frame->local : NULL, + (void *)&cli_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); dict_unref(xdata); GF_FREE(cli_rsp.dict.dict_val); @@ -191,6 +199,7 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req) int ret = -1, op_errno = 0; char *gfid_str = NULL; uuid_t gfid = {0}; + char *volume_uuid = NULL; GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err); @@ -224,6 +233,11 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req) goto err; } + ret = dict_get_strn(dict, "volume-uuid", SLEN("volume-uuid"), &volume_uuid); + if (ret) { + goto err; + } + gf_uuid_parse((const char *)gfid_str, gfid); frame = quotad_aggregator_get_frame_from_req(req); @@ -232,7 +246,9 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req) goto errx; } state = frame->root->state; - state->xdata = dict; + state->req_xdata = dict; + state->xdata = dict_new(); + dict = NULL; ret = dict_set_int32_sizen(state->xdata, QUOTA_LIMIT_KEY, 42); if (ret) @@ -254,7 +270,7 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req) goto err; ret = qd_nameless_lookup(this, frame, (char *)gfid, state->xdata, - quotad_aggregator_getlimit_cbk); + volume_uuid, quotad_aggregator_getlimit_cbk); if (ret) { cli_rsp.op_errno = ret; goto errx; @@ -293,12 +309,14 @@ quotad_aggregator_lookup(rpcsvc_request_t *req) 0, }, }; - int ret = -1, op_errno = 0; + int i = 0, ret = -1, op_errno = 0; gfs3_lookup_rsp rsp = { 0, }; quotad_aggregator_state_t *state = NULL; xlator_t *this = NULL; + dict_t *dict = NULL; + char *volume_uuid = NULL; GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err); @@ -321,16 +339,34 @@ quotad_aggregator_lookup(rpcsvc_request_t *req) state = frame->root->state; - GF_PROTOCOL_DICT_UNSERIALIZE(this, state->xdata, (args.xdata.xdata_val), + GF_PROTOCOL_DICT_UNSERIALIZE(this, dict, (args.xdata.xdata_val), (args.xdata.xdata_len), ret, op_errno, err); - ret = qd_nameless_lookup(this, frame, args.gfid, state->xdata, + ret = dict_get_str(dict, "volume-uuid", &volume_uuid); + if (ret) { + goto err; + } + + state->xdata = dict_new(); + + for (i = 0; qd_ext_xattrs[i]; i++) { + if (dict_get(dict, qd_ext_xattrs[i])) { + ret = dict_set_uint32(state->xdata, qd_ext_xattrs[i], 1); + if (ret < 0) + goto err; + } + } + + ret = qd_nameless_lookup(this, frame, args.gfid, state->xdata, volume_uuid, quotad_aggregator_lookup_cbk); if (ret) { rsp.op_errno = ret; goto err; } + if (dict) + dict_unref(dict); + return ret; err: @@ -338,6 +374,9 @@ err: rsp.op_errno = op_errno; quotad_aggregator_lookup_cbk(this, frame, &rsp); + if (dict) + dict_unref(dict); + return ret; } @@ -439,15 +478,15 @@ out: return ret; } -rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = { - [GF_AGGREGATOR_NULL] = {"NULL", GF_AGGREGATOR_NULL, NULL, NULL, 0, DRC_NA}, - [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", GF_AGGREGATOR_NULL, - quotad_aggregator_lookup, NULL, 0, DRC_NA}, - [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", GF_AGGREGATOR_GETLIMIT, - quotad_aggregator_getlimit, NULL, 0, DRC_NA}, +static rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = { + [GF_AGGREGATOR_NULL] = {"NULL", NULL, NULL, GF_AGGREGATOR_NULL, DRC_NA, 0}, + [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", quotad_aggregator_lookup, NULL, + GF_AGGREGATOR_NULL, DRC_NA, 0}, + [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", quotad_aggregator_getlimit, NULL, + GF_AGGREGATOR_GETLIMIT, DRC_NA, 0}, }; -struct rpcsvc_program quotad_aggregator_prog = { +static struct rpcsvc_program quotad_aggregator_prog = { .progname = "GlusterFS 3.3", .prognum = GLUSTER_AGGREGATOR_PROGRAM, .progver = GLUSTER_AGGREGATOR_VERSION, diff --git a/xlators/features/quota/src/quotad-aggregator.h b/xlators/features/quota/src/quotad-aggregator.h index 318ad7f4995..706592c7d50 100644 --- a/xlators/features/quota/src/quotad-aggregator.h +++ b/xlators/features/quota/src/quotad-aggregator.h @@ -23,13 +23,15 @@ typedef struct { inode_table_t *itable; loc_t loc; dict_t *xdata; + dict_t *req_xdata; } quotad_aggregator_state_t; typedef int (*quotad_aggregator_lookup_cbk_t)(xlator_t *this, call_frame_t *frame, void *rsp); int qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid, - dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk); + dict_t *xdata, char *volume_uuid, + quotad_aggregator_lookup_cbk_t lookup_cbk); int quotad_aggregator_init(xlator_t *this); diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c index be8f9080f14..51ff1d7e98d 100644 --- a/xlators/features/quota/src/quotad-helpers.c +++ b/xlators/features/quota/src/quotad-helpers.c @@ -47,6 +47,9 @@ quotad_aggregator_free_state(quotad_aggregator_state_t *state) if (state->xdata) dict_unref(state->xdata); + if (state->req_xdata) + dict_unref(state->req_xdata); + GF_FREE(state); } @@ -73,7 +76,6 @@ quotad_aggregator_alloc_frame(rpcsvc_request_t *req) goto out; frame->root->state = state; - frame->root->unique = 0; frame->this = this; out: @@ -93,8 +95,6 @@ quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req) frame->root->op = req->procnum; - frame->root->unique = req->xid; - frame->root->uid = req->uid; frame->root->gid = req->gid; frame->root->pid = req->pid; diff --git a/xlators/features/quota/src/quotad.c b/xlators/features/quota/src/quotad.c index 11ef2b1189c..643f25c9c2a 100644 --- a/xlators/features/quota/src/quotad.c +++ b/xlators/features/quota/src/quotad.c @@ -9,7 +9,6 @@ */ #include "quota.h" #include "quotad-aggregator.h" -#include <glusterfs/common-utils.h> int qd_notify(xlator_t *this, int32_t event, void *data, ...) @@ -105,7 +104,8 @@ out: int qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid, - dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk) + dict_t *xdata, char *volume_uuid, + quotad_aggregator_lookup_cbk_t lookup_cbk) { gfs3_lookup_rsp rsp = { 0, @@ -116,7 +116,6 @@ qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid, }; quotad_aggregator_state_t *state = NULL; xlator_t *subvol = NULL; - char *volume_uuid = NULL; state = frame->root->state; @@ -130,13 +129,6 @@ qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid, memcpy(loc.gfid, gfid, 16); - ret = dict_get_strn(xdata, "volume-uuid", SLEN("volume-uuid"), - &volume_uuid); - if (ret < 0) { - op_errno = EINVAL; - goto out; - } - ret = dict_set_int8(xdata, QUOTA_READ_ONLY_KEY, 1); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c index ac966633c34..48654998e63 100644 --- a/xlators/features/read-only/src/read-only.c +++ b/xlators/features/read-only/src/read-only.c @@ -7,7 +7,6 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include <glusterfs/defaults.h> #include "read-only-common.h" #include "read-only-mem-types.h" #include "read-only.h" diff --git a/xlators/features/read-only/src/read-only.h b/xlators/features/read-only/src/read-only.h index b959d2a44fb..aced5d3c577 100644 --- a/xlators/features/read-only/src/read-only.h +++ b/xlators/features/read-only/src/read-only.h @@ -11,24 +11,25 @@ #ifndef __READONLY_H__ #define __READONLY_H__ -#include "read-only-mem-types.h" -#include <glusterfs/xlator.h> +#include <stdint.h> // for uint64_t, uint8_t +#include <sys/time.h> // for time_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t typedef struct { uint8_t worm : 1; uint8_t retain : 1; uint8_t legal_hold : 1; uint8_t ret_mode : 1; - uint64_t ret_period; - uint64_t auto_commit_period; + int64_t ret_period; + int64_t auto_commit_period; } worm_reten_state_t; typedef struct { gf_boolean_t readonly_or_worm_enabled; gf_boolean_t worm_file; gf_boolean_t worm_files_deletable; - uint64_t reten_period; - uint64_t com_period; + int64_t reten_period; + int64_t com_period; int reten_mode; time_t start_time; } read_only_priv_t; diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c index 25fbd4aa748..df45f2a940b 100644 --- a/xlators/features/read-only/src/worm-helper.c +++ b/xlators/features/read-only/src/worm-helper.c @@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr) GF_VALIDATE_OR_GOTO("worm", this, out); GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); - start_time = time(NULL); + start_time = gf_time(); dict = dict_new(); if (!dict) { gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); @@ -94,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, if (ret) goto out; stbuf->ia_mtime = stpre.ia_mtime; - stbuf->ia_atime = time(NULL) + retention_state->ret_period; + stbuf->ia_atime = gf_time() + retention_state->ret_period; if (fop_with_fd) ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, @@ -286,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, { int op_errno = EROFS; int ret = -1; + time_t now = 0; uint64_t com_period = 0; uint64_t start_time = 0; dict_t *dict = NULL; @@ -337,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, goto out; } - if (ret == -1 && (time(NULL) - start_time) >= com_period) { - if ((time(NULL) - stbuf.ia_mtime) >= com_period) { + now = gf_time(); + + if (ret == -1 && (now - start_time) >= com_period) { + if ((now - stbuf.ia_mtime) >= com_period) { ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state, &stbuf); if (ret) { @@ -352,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, op_errno = 0; goto out; } - } else if (ret == -1 && (time(NULL) - start_time) < com_period) { + } else if (ret == -1 && (now - start_time) < com_period) { op_errno = 0; goto out; - } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) { + } else if (reten_state.retain && ((now >= stbuf.ia_atime))) { gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf); } if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable && diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c index 24196f83931..1cc5526d5cd 100644 --- a/xlators/features/read-only/src/worm.c +++ b/xlators/features/read-only/src/worm.c @@ -292,6 +292,12 @@ worm_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, goto out; } } + reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime - + stpre.ia_atime; + ret = gf_worm_set_xattr(this, &reten_state, _gf_false, loc); + if (ret) { + goto out; + } stbuf->ia_mtime = stpre.ia_mtime; } } @@ -372,6 +378,13 @@ worm_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, goto out; } } + reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime - + stpre.ia_atime; + ret = gf_worm_set_xattr(this, &reten_state, _gf_true, fd); + if (ret) { + goto out; + } + stbuf->ia_mtime = stpre.ia_mtime; } } @@ -427,29 +440,22 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, { int ret = 0; read_only_priv_t *priv = NULL; - dict_t *dict = NULL; + // In case of an error exit because fd can be NULL and this would + // cause an segfault when performing fsetxattr . We explicitly + // unwind to avoid future problems + if (op_ret < 0) { + goto out; + } priv = this->private; GF_ASSERT(priv); if (priv->worm_file) { - dict = dict_new(); - if (!dict) { - gf_log(this->name, GF_LOG_ERROR, - "Error creating the " - "dict"); - goto out; - } - ret = dict_set_int8(dict, "trusted.worm_file", 1); + ret = fd_ctx_set(fd, this, 1); if (ret) { gf_log(this->name, GF_LOG_ERROR, - "Error in setting " - "the dict"); - goto out; - } - ret = syncop_fsetxattr(this, fd, dict, 0, NULL, NULL); - if (ret) { - gf_log(this->name, GF_LOG_ERROR, "Error setting xattr"); - goto out; + "Failed to set the fd ctx " + "for gfid:%s . Worm feature may not work for the gfid", + uuid_utoa(inode->gfid)); } ret = worm_init_state(this, _gf_true, fd); if (ret) { @@ -460,8 +466,6 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, out: STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, preparent, postparent, xdata); - if (dict) - dict_unref(dict); return ret; } @@ -519,8 +523,8 @@ init(xlator_t *this) GF_OPTION_INIT("worm", priv->readonly_or_worm_enabled, bool, out); GF_OPTION_INIT("worm-file-level", priv->worm_file, bool, out); - GF_OPTION_INIT("default-retention-period", priv->reten_period, uint64, out); - GF_OPTION_INIT("auto-commit-period", priv->com_period, uint64, out); + GF_OPTION_INIT("default-retention-period", priv->reten_period, int64, out); + GF_OPTION_INIT("auto-commit-period", priv->com_period, int64, out); GF_OPTION_INIT("retention-mode", reten_mode, str, out); set_reten_mode(priv, reten_mode); GF_OPTION_INIT("worm-files-deletable", priv->worm_files_deletable, bool, @@ -545,10 +549,10 @@ reconfigure(xlator_t *this, dict_t *options) out); GF_OPTION_RECONF("worm-file-level", priv->worm_file, options, bool, out); GF_OPTION_RECONF("default-retention-period", priv->reten_period, options, - uint64, out); + int64, out); GF_OPTION_RECONF("retention-mode", reten_mode, options, str, out); set_reten_mode(priv, reten_mode); - GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, uint64, + GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, int64, out); GF_OPTION_RECONF("worm-files-deletable", priv->worm_files_deletable, options, bool, out); @@ -569,6 +573,7 @@ fini(xlator_t *this) mem_put(priv); this->private = NULL; mem_pool_destroy(this->local_pool); + this->local_pool = NULL; out: return; } @@ -596,7 +601,62 @@ struct xlator_fops fops = { .lk = ro_lk, }; -struct xlator_cbks cbks; +int32_t +worm_release(xlator_t *this, fd_t *fd) +{ + dict_t *dict = NULL; + int ret = -1; + dict = dict_new(); + uint64_t value = 0; + loc_t loc = { + 0, + }; + read_only_priv_t *priv = NULL; + priv = this->private; + + if (priv->worm_file) { + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); + goto out; + } + + ret = fd_ctx_get(fd, this, &value); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "Failed to get the fd ctx"); + } + if (!value) { + goto out; + } + + ret = dict_set_int8(dict, "trusted.worm_file", 1); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Error in setting " + "the dict"); + goto out; + } + + loc.inode = inode_ref(fd->inode); + gf_uuid_copy(loc.gfid, fd->inode->gfid); + ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error setting xattr"); + goto out; + } + + gf_worm_state_transition(this, _gf_false, &loc, GF_FOP_WRITE); + } + +out: + loc_wipe(&loc); + if (dict) + dict_unref(dict); + return 0; +} + +struct xlator_cbks cbks = { + .release = worm_release, +}; struct volume_options options[] = { {.key = {"worm"}, diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c index 132f97ca4ea..aaf13f0852e 100644 --- a/xlators/features/sdfs/src/sdfs.c +++ b/xlators/features/sdfs/src/sdfs.c @@ -139,6 +139,8 @@ sdfs_get_new_frame_common(call_frame_t *frame, call_frame_t **new_frame) } local->main_frame = frame; + /*Set unique lk-owner for the fop*/ + set_lk_owner_from_ptr(&(*new_frame)->root->lk_owner, (*new_frame)->root); ret = 0; err: @@ -175,9 +177,10 @@ sdfs_get_new_frame(call_frame_t *frame, loc_t *loc, call_frame_t **new_frame) ret = 0; err: - if ((ret < 0) && (*new_frame != NULL)) { + if (ret && (*new_frame)) { SDFS_STACK_DESTROY((*new_frame)); *new_frame = NULL; + ret = -1; } return ret; @@ -868,6 +871,8 @@ sdfs_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, op_errno = ENOMEM; goto err; } + /*Set unique lk-owner for the fop*/ + set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root); gf_client_ref(client); new_frame->root->client = client; @@ -1121,6 +1126,8 @@ sdfs_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, op_errno = ENOMEM; goto err; } + /*Set unique lk-owner for the fop*/ + set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root); gf_client_ref(client); new_frame->root->client = client; @@ -1429,6 +1436,7 @@ void fini(xlator_t *this) { mem_pool_destroy(this->local_pool); + this->local_pool = NULL; return; } @@ -1450,7 +1458,7 @@ struct xlator_cbks cbks; struct volume_options options[] = { {.key = {"pass-through"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "false", + .default_value = "true", .op_version = {GD_OP_VERSION_4_1_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT, .tags = {"sdfs"}, diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c index 58b4c5d4503..9b1b4b55e1a 100644 --- a/xlators/features/selinux/src/selinux.c +++ b/xlators/features/selinux/src/selinux.c @@ -234,7 +234,6 @@ init(xlator_t *this) priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t); if (!priv) { gf_log(this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; goto out; } @@ -242,7 +241,6 @@ init(xlator_t *this) this->local_pool = mem_pool_new(selinux_priv_t, 64); if (!this->local_pool) { - ret = -1; gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM, "Failed to create local_t's memory pool"); goto out; @@ -252,10 +250,9 @@ init(xlator_t *this) ret = 0; out: if (ret) { - if (priv) { - GF_FREE(priv); - } + GF_FREE(priv); mem_pool_destroy(this->local_pool); + this->local_pool = NULL; } return ret; } @@ -284,6 +281,7 @@ fini(xlator_t *this) GF_FREE(priv); mem_pool_destroy(this->local_pool); + this->local_pool = NULL; return; } diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index abea8dc64a3..e5f93063943 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -80,7 +80,8 @@ __shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) INIT_LIST_HEAD(&ctx_p->ilist); INIT_LIST_HEAD(&ctx_p->to_fsync_list); - ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set(inode, this, &ctx_uint); if (ret < 0) { GF_FREE(ctx_p); return ret; @@ -273,6 +274,7 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, * of the to_fsync_list. */ inode_ref(base_inode); + inode_ref(shard_inode); LOCK(&base_inode->lock); LOCK(&shard_inode->lock); @@ -286,8 +288,10 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, /* Unref the base inode corresponding to the ref above, if the shard is * found to be already part of the fsync list. */ - if (ret != 0) + if (ret != 0) { inode_unref(base_inode); + inode_unref(shard_inode); + } return ret; } @@ -509,6 +513,9 @@ shard_local_wipe(shard_local_t *local) loc_wipe(&local->int_entrylk.loc); loc_wipe(&local->newloc); + if (local->name) + GF_FREE(local->name); + if (local->int_entrylk.basename) GF_FREE(local->int_entrylk.basename); if (local->fd) @@ -686,8 +693,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, ctx->block_num = block_num; list_add_tail(&ctx->ilist, &priv->ilist_head); priv->inode_count++; - if (base_inode) - ctx->base_inode = inode_ref(base_inode); + ctx->base_inode = inode_ref(base_inode); } else { /*If on the other hand there is no available slot for this inode * in the list, delete the lru inode from the head of the list, @@ -734,6 +740,10 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); inode_forget(lru_inode, 0); } else { + /* The following unref corresponds to the ref + * held when the shard was added to fsync list. + */ + inode_unref(lru_inode); fsync_inode = lru_inode; if (lru_base_inode) inode_unref(lru_base_inode); @@ -758,8 +768,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, else gf_uuid_copy(ctx->base_gfid, gfid); ctx->block_num = block_num; - if (base_inode) - ctx->base_inode = inode_ref(base_inode); + ctx->base_inode = inode_ref(base_inode); list_add_tail(&ctx->ilist, &priv->ilist_head); } } else { @@ -879,26 +888,34 @@ int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, call_frame_t *frame, int32_t op_ret) { - shard_local_t *local = NULL; + shard_local_t *local = frame->local; - local = frame->local; + /* the below 3 variables are required because, in SHARD_STACK_UNWIND() + macro, there is a check for local being null. So many static analyzers + backtrace the code with assumption of possible (local == NULL) case, + and complains for below lines. By handling it like below, we overcome + the warnings */ + + struct iatt *prebuf = ((local) ? &local->prebuf : NULL); + struct iatt *postbuf = ((local) ? &local->postbuf : NULL); + dict_t *xattr_rsp = ((local) ? local->xattr_rsp : NULL); switch (fop) { case GF_FOP_WRITE: - SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(writev, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; case GF_FOP_FALLOCATE: - SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; case GF_FOP_ZEROFILL: - SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; case GF_FOP_DISCARD: - SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(discard, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; default: gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, @@ -987,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) } int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this); + +int shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, shard_post_resolve_fop_handler_t post_res_handler) { @@ -1003,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; shard_local_t *local = NULL; + uint64_t resolve_count = 0; priv = this->private; local = frame->local; local->call_count = 0; shard_idx_iter = local->first_block; res_inode = local->resolver_base_inode; + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + /* If this prealloc FOP is for fresh file creation, then the size of the + * file will be 0. Then there will be no shards associated with this file. + * So we can skip the lookup process for the shards which do not exists + * and directly issue mknod to crete shards. + * + * In case the prealloc fop is to extend the preallocated file to bigger + * size then just lookup and populate inodes of existing shards and + * update the create count + */ + if (local->fop == GF_FOP_FALLOCATE) { + if (!local->prebuf.ia_size) { + local->inode_list[0] = inode_ref(res_inode); + local->create_count = local->last_block; + shard_common_inode_write_post_lookup_shards_handler(frame, this); + return 0; + } + if (local->prebuf.ia_size < local->total_size) + local->create_count = local->last_block - + ((local->prebuf.ia_size - 1) / + local->block_size); + } + + resolve_count = local->last_block - local->create_count; + if (res_inode) gf_uuid_copy(gfid, res_inode->gfid); else gf_uuid_copy(gfid, local->base_gfid); - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; - - while (shard_idx_iter <= local->last_block) { + while (shard_idx_iter <= resolve_count) { i++; if (shard_idx_iter == 0) { local->inode_list[i] = inode_ref(res_inode); @@ -1130,6 +1177,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, { int ret = -1; int64_t *size_attr = NULL; + int64_t delta_blocks = 0; inode_t *inode = NULL; shard_local_t *local = NULL; dict_t *xattr_req = NULL; @@ -1151,13 +1199,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, /* If both size and block count have not changed, then skip the xattrop. */ - if ((local->delta_size + local->hole_size == 0) && - (local->delta_blocks == 0)) { + delta_blocks = GF_ATOMIC_GET(local->delta_blocks); + if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { goto out; } ret = shard_set_size_attrs(local->delta_size + local->hole_size, - local->delta_blocks, &size_attr); + delta_blocks, &size_attr); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); @@ -1593,7 +1641,8 @@ shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) shard_local_t *local = NULL; this->itable = loc->inode->table; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && + (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) { SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); } @@ -1643,26 +1692,24 @@ err: } int -shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this, + inode_t *inode, int32_t op_ret, + int32_t op_errno, struct iatt *buf, + dict_t *xdata) { int ret = -1; int32_t mask = SHARD_INODE_WRITE_MASK; - shard_local_t *local = NULL; + shard_local_t *local = frame->local; shard_inode_ctx_t ctx = { 0, }; - local = frame->local; - if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" " failed : %s", - loc_gfid_utoa(&(local->loc))); + uuid_utoa(inode->gfid)); local->op_ret = op_ret; local->op_errno = op_errno; goto unwind; @@ -1696,18 +1743,57 @@ unwind: } int -shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, - shard_post_fop_handler_t handler) +shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + shard_local_t *local = frame->local; + + shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret, + op_errno, buf, xdata); + return 0; +} + +int +shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + /* In case of op_ret < 0, inode passed to this function will be NULL + ex: in case of op_errno = ENOENT. So refer prefilled inode data + which is part of local. + Note: Reassigning/overriding the inode passed to this cbk with inode + which is part of *struct shard_local_t* won't cause any issue as + both inodes have same reference/address as of the inode passed */ + inode = ((shard_local_t *)frame->local)->loc.inode; + + shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno, + buf, xdata); + return 0; +} + +/* This function decides whether to make file based lookup or + * fd based lookup (fstat) depending on the 3rd and 4th arg. + * If fd != NULL and loc == NULL then call is for fstat + * If fd == NULL and loc != NULL then call is for file based + * lookup. Please pass args based on the requirement. + */ +int +shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, shard_post_fop_handler_t handler) { int ret = -1; + inode_t *inode = NULL; shard_local_t *local = NULL; dict_t *xattr_req = NULL; gf_boolean_t need_refresh = _gf_false; local = frame->local; local->handler = handler; + inode = fd ? fd->inode : loc->inode; - ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, + ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf, &need_refresh); /* By this time, inode ctx should have been created either in create, * mknod, readdirp or lookup. If not it is a bug! @@ -1716,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" "Serving prebuf off the inode ctx cache", - uuid_utoa(loc->gfid)); + uuid_utoa(inode->gfid)); goto out; } @@ -1727,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, goto out; } - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out); - STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + if (fd) + STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xattr_req); + else + STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); dict_unref(xattr_req); return 0; @@ -1942,6 +2032,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dict_t *xdata) { inode_t *inode = NULL; + int64_t delta_blocks = 0; shard_local_t *local = NULL; local = frame->local; @@ -1962,14 +2053,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } local->postbuf.ia_size = local->offset; - local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks); /* Let the delta be negative. We want xattrop to do subtraction */ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks; + delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, + postbuf->ia_blocks - prebuf->ia_blocks); + GF_ASSERT(delta_blocks <= 0); + local->postbuf.ia_blocks += delta_blocks; local->hole_size = 0; - shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES); - + shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); shard_update_file_size(frame, this, NULL, &local->loc, shard_post_update_size_truncate_handler); return 0; @@ -1997,10 +2089,9 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) */ if (!inode) { gf_msg_debug(this->name, 0, - "Last shard to be truncated absent" - " in backend: %s. Directly proceeding to update " - "file size", - uuid_utoa(inode->gfid)); + "Last shard to be truncated absent in backend: %" PRIu64 + " of gfid %s. Directly proceeding to update file size", + local->first_block, uuid_utoa(local->loc.inode->gfid)); shard_update_file_size(frame, this, NULL, &local->loc, shard_post_update_size_truncate_handler); return 0; @@ -2029,8 +2120,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + int ret = 0; int call_count = 0; int shard_block_num = (long)cookie; + uint64_t block_count = 0; shard_local_t *local = NULL; local = frame->local; @@ -2040,6 +2133,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; goto done; } + ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); + if (!ret) { + GF_ATOMIC_SUB(local->delta_blocks, block_count); + } else { + /* dict_get failed possibly due to a heterogeneous cluster? */ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get key %s from dict during truncate of gfid %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + } shard_unlink_block_inode(local, shard_block_num); done: @@ -2069,6 +2172,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) gf_boolean_t wind_failed = _gf_false; shard_local_t *local = NULL; shard_priv_t *priv = NULL; + dict_t *xdata_req = NULL; local = frame->local; priv = this->private; @@ -2096,7 +2200,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) local->postbuf.ia_size = local->offset; local->postbuf.ia_blocks = local->prebuf.ia_blocks; local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - local->delta_blocks = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); local->hole_size = 0; shard_update_file_size(frame, this, local->fd, &local->loc, shard_post_update_size_truncate_handler); @@ -2105,6 +2209,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) local->call_count = call_count; i = 1; + xdata_req = dict_new(); + if (!xdata_req) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } + ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict during truncate of %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + dict_unref(xdata_req); + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } SHARD_SET_ROOT_FS_ID(frame, local); while (cur_block <= last_block) { @@ -2143,7 +2262,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL); + FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); loc_wipe(&loc); next: i++; @@ -2151,6 +2270,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) if (!--call_count) break; } + dict_unref(xdata_req); return 0; } @@ -2206,13 +2326,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, xlator_t *this = NULL; inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; + inode_t *base_inode = NULL; this = THIS; priv = this->private; - if (local->loc.inode) + if (local->loc.inode) { gf_uuid_copy(gfid, local->loc.inode->gfid); - else + base_inode = local->loc.inode; + } else if (local->resolver_base_inode) { + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + base_inode = local->resolver_base_inode; + } else { gf_uuid_copy(gfid, local->base_gfid); + } shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); @@ -2225,7 +2351,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, LOCK(&priv->lock); { fsync_inode = __shard_update_shards_inode_list( - linked_inode, this, local->loc.inode, block_num, gfid); + linked_inode, this, base_inode, block_num, gfid); } UNLOCK(&priv->lock); if (fsync_inode) @@ -2347,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, int count = 0; int call_count = 0; int32_t shard_idx_iter = 0; - int last_block = 0; + int lookup_count = 0; char path[PATH_MAX] = { 0, }; @@ -2367,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, local = frame->local; count = call_count = local->call_count; shard_idx_iter = local->first_block; - last_block = local->last_block; + lookup_count = local->last_block - local->create_count; local->pls_fop_handler = handler; if (local->lookup_shards_barriered) local->barrier.waitfor = local->call_count; @@ -2377,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, else gf_uuid_copy(gfid, local->base_gfid); - while (shard_idx_iter <= last_block) { + while (shard_idx_iter <= lookup_count) { if (local->inode_list[i]) { i++; shard_idx_iter++; @@ -2522,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; @@ -2597,7 +2724,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) */ local->hole_size = local->offset - local->prebuf.ia_size; local->delta_size = 0; - local->delta_blocks = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); local->postbuf.ia_size = local->offset; tmp_stbuf.ia_size = local->offset; shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, @@ -2613,7 +2740,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) */ local->hole_size = 0; local->delta_size = (local->offset - local->prebuf.ia_size); - local->delta_blocks = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); tmp_stbuf.ia_size = local->offset; shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, SHARD_INODE_WRITE_MASK); @@ -2669,9 +2796,10 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, if (!local->xattr_req) goto err; local->resolver_base_inode = loc->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_truncate_handler); return 0; err: @@ -2724,9 +2852,10 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); local->resolver_base_inode = fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_truncate_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); @@ -2870,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!local->xattr_req) goto err; - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_link_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_link_handler); return 0; err: shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); @@ -2885,13 +3014,20 @@ int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) { shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; local = frame->local; + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, - "failed to delete shards of %s", - uuid_utoa(local->resolver_base_inode->gfid)); + "failed to delete shards of %s", uuid_utoa(gfid)); return 0; } local->op_ret = 0; @@ -2932,8 +3068,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) shard_priv_t *priv = NULL; shard_inode_ctx_t *ctx = NULL; shard_inode_ctx_t *base_ictx = NULL; - gf_boolean_t unlink_unref_forget = _gf_false; int unref_base_inode = 0; + int unref_shard_inode = 0; this = THIS; priv = this->private; @@ -2958,11 +3094,12 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) list_del_init(&ctx->ilist); priv->inode_count--; unref_base_inode++; + unref_shard_inode++; GF_ASSERT(priv->inode_count >= 0); - unlink_unref_forget = _gf_true; } if (ctx->fsync_needed) { unref_base_inode++; + unref_shard_inode++; list_del_init(&ctx->to_fsync_list); if (base_inode) { __shard_inode_ctx_get(base_inode, this, &base_ictx); @@ -2973,11 +3110,11 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) UNLOCK(&inode->lock); if (base_inode) UNLOCK(&base_inode->lock); - if (unlink_unref_forget) { - inode_unlink(inode, priv->dot_shard_inode, block_bname); - inode_unref(inode); - inode_forget(inode, 0); - } + + inode_unlink(inode, priv->dot_shard_inode, block_bname); + inode_ref_reduce_by_n(inode, unref_shard_inode); + inode_forget(inode, 0); + if (base_inode && unref_base_inode) inode_ref_reduce_by_n(base_inode, unref_base_inode); UNLOCK(&priv->lock); @@ -3982,6 +4119,7 @@ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->op_ret = op_ret; local->op_errno = op_errno; } else { + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); local->preoldparent = *preparent; local->postoldparent = *postparent; if (xdata) @@ -4191,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) switch (local->fop) { case GF_FOP_UNLINK: case GF_FOP_RENAME: - shard_lookup_base_file(frame, this, &local->int_inodelk.loc, - shard_post_lookup_base_shard_rm_handler); + shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL, + shard_post_lookup_base_shard_rm_handler); break; default: gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, @@ -4447,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->block_size) { local->tmp_loc.inode = inode_new(this->itable); gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); - shard_lookup_base_file(frame, this, &local->tmp_loc, - shard_post_rename_lookup_handler); + shard_refresh_base_file(frame, this, &local->tmp_loc, NULL, + shard_post_rename_lookup_handler); } else { shard_rename_cbk(frame, this); } @@ -4705,6 +4843,8 @@ out: if (xdata) local->xattr_rsp = dict_ref(xdata); vec.iov_base = local->iobuf->ptr; + if (local->offset + local->req_size > local->prebuf.ia_size) + local->total_size = local->prebuf.ia_size - local->offset; vec.iov_len = local->total_size; local->op_ret = local->total_size; SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, @@ -5085,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = local->loc.inode; local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), @@ -5181,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_readv_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_readv_handler); return 0; err: shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); @@ -5283,7 +5424,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, local->op_errno = op_errno; } else { local->written_size += op_ret; - local->delta_blocks += (post->ia_blocks - pre->ia_blocks); + GF_ATOMIC_ADD(local->delta_blocks, + post->ia_blocks - pre->ia_blocks); local->delta_size += (post->ia_size - pre->ia_size); shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); @@ -5422,21 +5564,17 @@ shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) remaining_size -= shard_write_size; if (local->fop == GF_FOP_WRITE) { + vec = NULL; count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, NULL); - - vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); - if (!vec) { + shard_write_size, &vec, 0); + if (count < 0) { local->op_ret = -1; local->op_errno = ENOMEM; wind_failed = _gf_true; - GF_FREE(vec); shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, NULL, NULL); goto next; } - count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, vec); } if (cur_block == 0) { @@ -5548,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame, shard_common_lookup_shards( frame, this, local->resolver_base_inode, shard_common_inode_write_post_lookup_shards_handler); + } else if (local->create_count) { + shard_common_inode_write_post_lookup_shards_handler(frame, this); } else { shard_common_inode_write_do(frame, this); } @@ -5578,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, local->last_block = get_highest_block(local->offset, local->total_size, local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); if (!local->inode_list) { @@ -5586,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, } gf_msg_trace(this->name, 0, - "%s: gfid=%s first_block=%" PRIu32 + "%s: gfid=%s first_block=%" PRIu64 " " - "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 + "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), @@ -5793,6 +5934,7 @@ shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, shard_inode_ctx_t *ctx = NULL; shard_inode_ctx_t *base_ictx = NULL; inode_t *base_inode = NULL; + gf_boolean_t unref_shard_inode = _gf_false; local = frame->local; base_inode = local->fd->inode; @@ -5826,11 +5968,16 @@ out: if (ctx->fsync_needed != 0) { list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); base_ictx->fsync_count++; + } else { + unref_shard_inode = _gf_true; } } UNLOCK(&anon_fd->inode->lock); UNLOCK(&base_inode->lock); } + + if (unref_shard_inode) + inode_unref(anon_fd->inode); if (anon_fd) fd_unref(anon_fd); @@ -5977,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_fsync_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_fsync_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); @@ -6170,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } int32_t -shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local, + char *key) { - int op_errno = EINVAL; + int ret = 0; + struct iatt *tmpbuf = NULL; + struct iatt *stbuf = NULL; + data_t *data = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + if (!xdata) + return 0; + + data = dict_get(xdata, key); + if (!data) + return 0; + + tmpbuf = data_to_iatt(data, key); + stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); + if (stbuf == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + *stbuf = *tmpbuf; + stbuf->ia_size = local->prebuf.ia_size; + stbuf->ia_blocks = local->prebuf.ia_blocks; + ret = dict_set_iatt(xdata, key, stbuf, false); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; } + return 0; - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +err: + GF_FREE(stbuf); + return -1; +} + +int32_t +shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + + local = frame->local; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + if (local->fd) + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, local->fd, + local->name, local->xattr_req); + else + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &local->loc, + local->name, local->xattr_req); + return 0; +} + +int32_t +shard_common_remove_xattr(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + /* If shard's special xattrs are attempted to be removed, + * fail the fop with EPERM (except if the client is gsyncd). + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err); } + /* Repeat the same check for bulk-removexattr */ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + + if (name) { + local->name = gf_strdup(name); + if (!local->name) + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_remove_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name, + xdata); + return 0; +} + +int32_t +shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name, + xdata); return 0; } @@ -6292,38 +6601,164 @@ out: } int32_t -shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int op_errno = EINVAL; + int ret = -1; + shard_local_t *local = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + if (local->fd) + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, local->fd, + local->xattr_req, local->flags, local->xattr_rsp); + else + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, + local->xattr_req, local->flags, local->xattr_rsp); + return 0; +} + +int32_t +shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, + loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags, + dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + /* Sharded or not, if shard's special xattrs are attempted to be set, + * fail the fop with EPERM (except if the client is gsyncd. + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err); + } + + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + local->flags = flags; + /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict + * and the xdata dict + */ + if (dict) + local->xattr_req = dict_ref(dict); + if (xdata) + local->xattr_rsp = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_set_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, + xdata); + return 0; +} + +int32_t +shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, + xdata); return 0; } @@ -6581,12 +7016,13 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, local->fd = fd_ref(fd); local->block_size = block_size; local->resolver_base_inode = local->fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_common_inode_write_post_lookup_handler); return 0; out: shard_common_failure_unwind(fop, frame, -1, ENOMEM); @@ -6731,6 +7167,9 @@ fini(xlator_t *this) GF_VALIDATE_OR_GOTO("shard", this, out); + /*Itable was not created by shard, hence setting to NULL.*/ + this->itable = NULL; + mem_pool_destroy(this->local_pool); this->local_pool = NULL; diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 570fe4685f5..4fe181b64d5 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame, typedef struct shard_local { int op_ret; int op_errno; - int first_block; - int last_block; - int num_blocks; + uint64_t first_block; + uint64_t last_block; + uint64_t num_blocks; int call_count; int eexist_count; int create_count; @@ -275,7 +275,7 @@ typedef struct shard_local { size_t req_size; size_t readdir_size; int64_t delta_size; - int delta_blocks; + gf_atomic_t delta_blocks; loc_t loc; loc_t dot_shard_loc; loc_t dot_shard_rm_loc; @@ -318,6 +318,7 @@ typedef struct shard_local { uint32_t deletion_rate; gf_boolean_t cleanup_required; uuid_t base_gfid; + char *name; } shard_local_t; typedef struct shard_inode_ctx { diff --git a/xlators/features/snapview-client/src/snapview-client-messages.h b/xlators/features/snapview-client/src/snapview-client-messages.h index f6b8f48ef72..c02fb154930 100644 --- a/xlators/features/snapview-client/src/snapview-client-messages.h +++ b/xlators/features/snapview-client/src/snapview-client-messages.h @@ -33,6 +33,39 @@ GLFS_MSGID(SNAPVIEW_CLIENT, SVC_MSG_NO_MEMORY, SVC_MSG_MEM_ACNT_FAILED, SVC_MSG_RENAME_SNAPSHOT_ENTRY, SVC_MSG_LINK_SNAPSHOT_ENTRY, SVC_MSG_COPY_ENTRY_POINT_FAILED, SVC_MSG_ENTRY_POINT_SPECIAL_DIR, SVC_MSG_STR_LEN, SVC_MSG_INVALID_ENTRY_POINT, SVC_MSG_NULL_PRIV, - SVC_MSG_PRIV_DESTROY_FAILED); + SVC_MSG_PRIV_DESTROY_FAILED, SVC_MSG_ALLOC_FD_FAILED, + SVC_MSG_ALLOC_INODE_FAILED, SVC_MSG_NULL_SPECIAL_DIR, + SVC_MSG_MEM_POOL_GET_FAILED); +#define SVC_MSG_ALLOC_FD_FAILED_STR "failed to allocate new fd context" +#define SVC_MSG_SET_FD_CONTEXT_FAILED_STR "failed to set fd context" +#define SVC_MSG_STR_LEN_STR \ + "destination buffer size is less than the length of entry point name" +#define SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL_STR "lookup failed on normal graph" +#define SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL_STR "lookup failed on snapview graph" +#define SVC_MSG_SET_INODE_CONTEXT_FAILED_STR "failed to set inode context" +#define SVC_MSG_NO_MEMORY_STR "failed to allocate memory" +#define SVC_MSG_COPY_ENTRY_POINT_FAILED_STR \ + "failed to copy the entry point string" +#define SVC_MSG_GET_FD_CONTEXT_FAILED_STR "fd context not found" +#define SVC_MSG_GET_INODE_CONTEXT_FAILED_STR "failed to get inode context" +#define SVC_MSG_ALLOC_INODE_FAILED_STR "failed to allocate new inode" +#define SVC_MSG_DICT_SET_FAILED_STR "failed to set dict" +#define SVC_MSG_RENAME_SNAPSHOT_ENTRY_STR \ + "rename happening on a entry residing in snapshot" +#define SVC_MSG_DELETE_INODE_CONTEXT_FAILED_STR "failed to delete inode context" +#define SVC_MSG_NULL_PRIV_STR "priv NULL" +#define SVC_MSG_INVALID_ENTRY_POINT_STR "not a valid entry point" +#define SVC_MSG_MEM_ACNT_FAILED_STR "Memory accouting init failed" +#define SVC_MSG_NO_CHILD_FOR_XLATOR_STR "configured without any child" +#define SVC_MSG_XLATOR_CHILDREN_WRONG_STR \ + "snap-view-client has got wrong subvolumes. It can have only 2" +#define SVC_MSG_ENTRY_POINT_SPECIAL_DIR_STR \ + "entry point directory cannot be part of special directory" +#define SVC_MSG_NULL_SPECIAL_DIR_STR "null special directory" +#define SVC_MSG_MEM_POOL_GET_FAILED_STR \ + "could not get mem pool for frame->local" +#define SVC_MSG_PRIV_DESTROY_FAILED_STR "failed to destroy private" +#define SVC_MSG_LINK_SNAPSHOT_ENTRY_STR \ + "link happening on a entry residin gin snapshot" #endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */ diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c index 5d7986c7f0f..486c5179d5b 100644 --- a/xlators/features/snapview-client/src/snapview-client.c +++ b/xlators/features/snapview-client/src/snapview-client.c @@ -198,16 +198,15 @@ __svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd) svc_fd = svc_fd_new(); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY, - "failed to allocate new fd context for gfid %s", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_FD_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); goto out; } ret = __svc_fd_ctx_set(this, fd, svc_fd); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED, - "failed to set fd context for gfid %s", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); ret = -1; } @@ -268,11 +267,9 @@ gf_svc_get_entry_point(xlator_t *this, char *entry_point, size_t dest_size) LOCK(&priv->lock); { if (dest_size <= strlen(priv->path)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN, - "destination buffer " - "size %zu is less than the length %zu of " - "the entry point name %s", - dest_size, strlen(priv->path), priv->path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN, + "dest-size=%zu", dest_size, "priv-path-len=%zu", + strlen(priv->path), "path=%s", priv->path, NULL); } else { snprintf(entry_point, dest_size, "%s", priv->path); ret = 0; @@ -321,19 +318,17 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, */ if (op_ret) { if (subvolume == FIRST_CHILD(this)) { - gf_msg(this->name, - (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG - : GF_LOG_ERROR, - op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, - "lookup failed on normal graph with error %s", - strerror(op_errno)); + gf_smsg(this->name, + (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, "error=%s", + strerror(op_errno), NULL); } else { - gf_msg(this->name, - (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG - : GF_LOG_ERROR, - op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, - "lookup failed on snapview graph with error %s", - strerror(op_errno)); + gf_smsg(this->name, + (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, "error=%s", + strerror(op_errno), NULL); goto out; } @@ -364,10 +359,8 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode type in the inode context " - "(gfid: %s)", - uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); out: if (do_unwind) { @@ -416,8 +409,7 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (!local) { op_ret = -1; op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate local"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL); goto out; } @@ -457,9 +449,8 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -540,9 +531,8 @@ gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (path_len >= snap_len && inode_type == VIRTUAL_INODE) { path = &loc->path[path_len - snap_len]; if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string "); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -577,20 +567,24 @@ gf_svc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - /* Consider a testcase: + /* TODO: FIX ME + * Consider a testcase: * #mount -t nfs host1:/vol1 /mnt * #ls /mnt * #ls /mnt/.snaps (As expected this fails) * #gluster volume set vol1 features.uss enable - * Now `ls /mnt/.snaps` should work, - * but fails with No such file or directory. - * This is because NFS client caches the list of files in - * a directory. This cache is updated if there are any changes - * in the directory attributes. To solve this problem change - * a attribute 'ctime' when USS is enabled + * Now `ls /mnt/.snaps` should work, but fails with No such file or + * directory. This is because NFS client (gNFS) caches the list of files + * in a directory. This cache is updated if there are any changes in the + * directory attributes. So, one way to solve this problem is to change + * 'ctime' attribute when USS is enabled as below. + * + * if (op_ret == 0 && IA_ISDIR(buf->ia_type)) + * buf->ia_ctime_nsec++; + * + * But this is not the ideal solution as applications see the unexpected + * ctime change causing failures. */ - if (op_ret == 0 && IA_ISDIR(buf->ia_type)) - buf->ia_ctime_nsec++; SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); return 0; @@ -699,8 +693,8 @@ gf_svc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (special_dir) { svc_fd = svc_fd_ctx_get_or_new(this, fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "fd context not found for %s", uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -745,19 +739,17 @@ gf_svc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, local = mem_get0(this->local_pool); if (!local) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate memory for local " - "(path: %s, gfid: %s)", - loc->path, uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, + "path=%s", loc->path, "gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); goto out; } + loc_copy(&local->loc, loc); + frame->local = local; SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, subvolume, out); - - loc_copy(&local->loc, loc); local->subvolume = subvolume; - frame->local = local; STACK_WIND(frame, gf_svc_opendir_cbk, subvolume, subvolume->fops->opendir, loc, fd, xdata); @@ -790,11 +782,9 @@ gf_svc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s " - "(gfid: %s)", - loc->path, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid= %s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -905,9 +895,8 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (!strcmp(attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) { if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1003,11 +992,9 @@ gf_svc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s " - "(gfid: %s)", - loc->name, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -1049,10 +1036,9 @@ gf_svc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -1094,11 +1080,9 @@ gf_svc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s " - "(gfid: %s)", - loc->name, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -1134,8 +1118,8 @@ gf_svc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent, @@ -1165,17 +1149,15 @@ gf_svc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1212,8 +1194,8 @@ gf_svc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, @@ -1243,17 +1225,15 @@ gf_svc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1338,8 +1318,8 @@ gf_svc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, @@ -1371,17 +1351,15 @@ gf_svc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1419,8 +1397,8 @@ gf_svc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = NORMAL_INODE; ret = svc_inode_ctx_set(this, inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent, @@ -1451,17 +1429,15 @@ gf_svc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1503,11 +1479,9 @@ gf_svc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context " - "for %s", - uuid_utoa(loc->parent->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); goto out; } @@ -1655,10 +1629,8 @@ gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * reconfigure while this is accessing it. */ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string. " - "Proceeding."); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); goto out; } @@ -1696,9 +1668,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, svc_fd = svc_fd_ctx_get_or_new(this, fd); if (!svc_fd) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for inode %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); else { if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { op_ret = 0; @@ -1712,9 +1683,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local = mem_get0(this->local_pool); if (!local) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate local (inode: %s)", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, + "inode-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } local->subvolume = subvolume; @@ -1795,17 +1765,16 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, svc_fd = svc_fd_ctx_get(this, local->fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for the inode %s", - uuid_utoa(local->fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL); op_ret = 0; op_errno = ENOENT; goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); op_ret = 0; op_errno = ENOENT; goto out; @@ -1813,8 +1782,8 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, entry = gf_dirent_for_name(entry_point); if (!entry) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, - "failed to allocate memory for the entry %s", entry_point); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "entry-point=%s", entry_point, NULL); op_ret = 0; op_errno = ENOMEM; goto out; @@ -1828,9 +1797,8 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, inode_type = VIRTUAL_INODE; ret = svc_inode_ctx_set(this, entry->inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set the inode context for the entry %s", - entry->d_name); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + "entry-name=%s", entry->d_name, NULL); list_add_tail(&entry->list, &entries.list); op_ret = 1; @@ -1875,14 +1843,14 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this, inode_unref(loc->inode); loc->inode = inode_new(loc->parent->table); if (!loc->inode) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY, - "failed to allocate new inode"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_INODE_FAILED, + NULL); goto out; } if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); goto out; } @@ -1912,8 +1880,7 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this, ret = dict_set_str(tmp_xdata, "entry-point", "true"); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, - "failed to set dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL); goto out; } @@ -1957,9 +1924,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, fd = local->fd; svc_fd = svc_fd_ctx_get(this, fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for inode %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -1976,8 +1942,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, strcmp(private->special_dir, "") && svc_fd->special_dir && local->subvolume == FIRST_CHILD(this)) { if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to copy the entry point string"); + gf_smsg(this->name, GF_LOG_WARNING, 0, + SVC_MSG_GET_FD_CONTEXT_FAILED, NULL); goto out; } @@ -1985,8 +1951,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, if (!inode) { inode = inode_new(fd->inode->table); if (!inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, - "failed to allocate new inode"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ALLOC_INODE_FAILED, + NULL); goto out; } } @@ -2016,8 +1982,7 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, goto out; ret = dict_set_str(tmp_xdata, "entry-point", "true"); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_SET_FAILED, - "failed to set dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL); goto out; } @@ -2067,9 +2032,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, svc_fd = svc_fd_ctx_get(this, local->fd); if (!svc_fd) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for gfid %s", - uuid_utoa(local->fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL); } if (local->subvolume == FIRST_CHILD(this)) @@ -2085,8 +2049,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * condition where, priv->path is changed in reconfigure */ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, - "failed to copy the entry point"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); goto out; } @@ -2107,9 +2071,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ret = svc_inode_ctx_set(this, entry->inode, inode_type); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, - SVC_MSG_SET_INODE_CONTEXT_FAILED, - "failed to set inode context"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_SET_INODE_CONTEXT_FAILED, NULL); if (svc_fd) svc_fd->last_offset = entry->d_off; } @@ -2148,8 +2111,7 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local = mem_get0(this->local_pool); if (!local) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, - "failed to allocate local"); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL); goto out; } @@ -2164,9 +2126,8 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, */ svc_fd = svc_fd_ctx_get_or_new(this, fd); if (!svc_fd) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, - "failed to get the fd context for the inode %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); else { if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { op_ret = 0; @@ -2221,22 +2182,17 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the context for the inode " - "%s", - uuid_utoa(oldloc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(oldloc->inode->gfid), NULL); goto out; } if (src_inode_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_RENAME_SNAPSHOT_ENTRY, - "rename happening on a entry %s " - "residing in snapshot", - oldloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "name=%s", oldloc->name, NULL); goto out; } @@ -2245,11 +2201,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && dst_inode_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_RENAME_SNAPSHOT_ENTRY, - "rename of %s happening to a entry " - "%s residing in snapshot", - oldloc->name, newloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s", + oldloc->name, "newloc-name=%s", newloc->name, NULL); goto out; } } @@ -2259,11 +2213,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && dst_parent_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_RENAME_SNAPSHOT_ENTRY, - "rename of %s happening to a entry %s " - "residing in snapshot", - oldloc->name, newloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s", + oldloc->name, "newloc-name=%s", newloc->name, NULL); goto out; } } @@ -2305,9 +2257,8 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && src_inode_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, - "link happening on a entry %s residing in snapshot", - oldloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, + "oldloc-name=%s", oldloc->name, NULL); goto out; } @@ -2315,10 +2266,9 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!ret && dst_parent_type == VIRTUAL_INODE) { op_ret = -1; op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, - "link of %s happening to a entry %s " - "residing in snapshot", - oldloc->name, newloc->name); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, + "oldloc-name=%s", oldloc->name, "newloc-name=%s", newloc->name, + NULL); goto out; } @@ -2353,11 +2303,9 @@ gf_svc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s " - "(gfid: %s)", - loc->path, uuid_utoa(loc->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); goto out; } @@ -2398,10 +2346,9 @@ gf_svc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, if (ret < 0) { op_ret = -1; op_errno = EINVAL; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SVC_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get inode context for %s", - uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -2485,8 +2432,9 @@ gf_svc_forget(xlator_t *this, inode_t *inode) ret = inode_ctx_del(inode, this, &value); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DELETE_INODE_CONTEXT_FAILED, - "failed to delete inode context for %s", uuid_utoa(inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_DELETE_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(inode->gfid), NULL); goto out; } @@ -2500,7 +2448,7 @@ gf_svc_priv_destroy(xlator_t *this, svc_private_t *priv) int ret = -1; if (!priv) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, "priv NULL"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, NULL); goto out; } @@ -2555,10 +2503,8 @@ reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("snapshot-directory", path, options, str, out); if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, - "%s is not a " - "valid entry point", - path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, + "path=%s", path, NULL); goto out; } @@ -2618,9 +2564,7 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_svc_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, - "Memory accounting" - " init failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, NULL); } return ret; @@ -2637,8 +2581,7 @@ init(xlator_t *this) char *special_dir = NULL; if (!this->children) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, - "configured without any child"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, NULL); goto out; } @@ -2649,11 +2592,8 @@ init(xlator_t *this) } if (children != 2) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG, - "snap-view-client " - "has got %d subvolumes. It can have only 2 " - "subvolumes.", - children); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG, + "subvol-num=%d", children, NULL); goto out; } @@ -2673,41 +2613,36 @@ init(xlator_t *this) GF_OPTION_INIT("snapshot-directory", path, str, out); if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, - "%s is not a valid entry point", path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, + "path=%s", path, NULL); goto out; } private ->path = gf_strdup(path); if (!private->path) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_NO_MEMORY, - "failed to allocate memory " - "for the entry point path %s", - path); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "entry-point-path=%s", path, NULL); goto out; } GF_OPTION_INIT("snapdir-entry-path", special_dir, str, out); if (!special_dir || strstr(special_dir, path)) { if (special_dir) - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ENTRY_POINT_SPECIAL_DIR, - "entry point directory %s cannot be part of " - "the special directory %s", - path, special_dir); + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_ENTRY_POINT_SPECIAL_DIR, "path=%s", path, + "special-dir=%s", special_dir); else - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ENTRY_POINT_SPECIAL_DIR, - "null special directory"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NULL_SPECIAL_DIR, + NULL); goto out; } private ->special_dir = gf_strdup(special_dir); if (!private->special_dir) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_NO_MEMORY, - "failed to allocate memory " - "for the special directory %s", - special_dir); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "special-directory=%s", special_dir, NULL); goto out; } @@ -2716,8 +2651,7 @@ init(xlator_t *this) this->local_pool = mem_pool_new(svc_local_t, 128); if (!this->local_pool) { - gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, - "could not get mem pool for frame->local"); + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_MEM_POOL_GET_FAILED, NULL); goto out; } @@ -2749,8 +2683,8 @@ fini(xlator_t *this) * set this->priv to NULL. */ if (gf_svc_priv_destroy(this, priv)) - gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED, - "failed to destroy private"); + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED, + NULL); this->private = NULL; diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c index 5514a54f0d6..62c1ddac49c 100644 --- a/xlators/features/snapview-server/src/snapview-server-helpers.c +++ b/xlators/features/snapview-server/src/snapview-server-helpers.c @@ -476,6 +476,7 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name, char logfile[PATH_MAX] = { 0, }; + char *volfile_server = NULL; GF_VALIDATE_OR_GOTO("snapview-server", this, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); @@ -512,14 +513,50 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name, goto out; } - ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007); + /* + * Before, localhost was used as the volfile server. But, with that + * method, accessing snapshots started giving ENOENT error if a + * specific bind address is mentioned in the glusterd volume file. + * Check the bug https://bugzilla.redhat.com/show_bug.cgi?id=1725211. + * So, the new method is tried below, where, snapview-server first + * uses the volfile server used by the snapd (obtained from the + * command line arguments saved in the global context of the process). + * If the volfile server in global context is NULL, then localhost + * is tried (like before). + */ + if (this->ctx->cmd_args.volfile_server) { + volfile_server = gf_strdup(this->ctx->cmd_args.volfile_server); + if (!volfile_server) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SVS_MSG_VOLFILE_SERVER_GET_FAIL, + "failed to copy volfile server %s. ", + this->ctx->cmd_args.volfile_server); + ret = -1; + goto out; + } + } else { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SVS_MSG_VOLFILE_SERVER_GET_FAIL, + "volfile server is NULL in cmd args. " + "Trying with localhost"); + volfile_server = gf_strdup("localhost"); + if (!volfile_server) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SVS_MSG_VOLFILE_SERVER_GET_FAIL, + "failed to copy volfile server localhost."); + ret = -1; + goto out; + } + } + + ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007); if (ret) { gf_msg(this->name, GF_LOG_ERROR, local_errno, SVS_MSG_SET_VOLFILE_SERVR_FAILED, "setting the " - "volfile server for snap volume %s " + "volfile server %s for snap volume %s " "failed", - dirent->name); + volfile_server, dirent->name); goto out; } @@ -561,6 +598,7 @@ out: dirent->fs = fs; } + GF_FREE(volfile_server); return fs; } diff --git a/xlators/features/snapview-server/src/snapview-server-messages.h b/xlators/features/snapview-server/src/snapview-server-messages.h index 8548015648a..f634ab5d2b0 100644 --- a/xlators/features/snapview-server/src/snapview-server-messages.h +++ b/xlators/features/snapview-server/src/snapview-server-messages.h @@ -49,6 +49,6 @@ GLFS_MSGID(SNAPVIEW_SERVER, SVS_MSG_NO_MEMORY, SVS_MSG_MEM_ACNT_FAILED, SVS_MSG_CLOSEDIR_FAILED, SVS_MSG_CLOSE_FAILED, SVS_MSG_GFID_GEN_FAILED, SVS_MSG_GLFS_NEW_FAILED, SVS_MSG_SET_VOLFILE_SERVR_FAILED, SVS_MSG_SET_LOGGING_FAILED, - SVS_MSG_GLFS_INIT_FAILED); + SVS_MSG_VOLFILE_SERVER_GET_FAIL, SVS_MSG_GLFS_INIT_FAILED); #endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */ diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c index b608cdfcd44..ecf31c3b880 100644 --- a/xlators/features/snapview-server/src/snapview-server-mgmt.c +++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c @@ -26,11 +26,11 @@ mgmt_cbk_snap(struct rpc_clnt *rpc, void *mydata, void *data) return 0; } -rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = { - [GF_CBK_GET_SNAPS] = {"GETSNAPS", GF_CBK_GET_SNAPS, mgmt_cbk_snap}, +static rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = { + [GF_CBK_GET_SNAPS] = {"GETSNAPS", mgmt_cbk_snap, GF_CBK_GET_SNAPS}, }; -struct rpcclnt_cb_program svs_cbk_prog = { +static struct rpcclnt_cb_program svs_cbk_prog = { .progname = "GlusterFS Callback", .prognum = GLUSTER_CBK_PROGRAM, .progver = GLUSTER_CBK_VERSION, @@ -38,12 +38,12 @@ struct rpcclnt_cb_program svs_cbk_prog = { .numactors = GF_CBK_MAXVALUE, }; -char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = { +static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = { [GF_HNDSK_NULL] = "NULL", [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY", }; -rpc_clnt_prog_t svs_clnt_handshake_prog = { +static rpc_clnt_prog_t svs_clnt_handshake_prog = { .progname = "GlusterFS Handshake", .prognum = GLUSTER_HNDSK_PROGRAM, .progver = GLUSTER_HNDSK_VERSION, @@ -101,8 +101,12 @@ svs_mgmt_init(xlator_t *this) if (cmd_args->volfile_server) host = cmd_args->volfile_server; + options = dict_new(); + if (!options) + goto out; + opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); - ret = rpc_transport_inet_options_build(&options, host, port, + ret = rpc_transport_inet_options_build(options, host, port, (opt != NULL ? opt->value : NULL)); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED, @@ -145,6 +149,8 @@ svs_mgmt_init(xlator_t *this) gf_msg_debug(this->name, 0, "svs mgmt init successful"); out: + if (options) + dict_unref(options); if (ret) if (priv) { rpc_clnt_connection_cleanup(&priv->rpc->conn); @@ -231,7 +237,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, glusterfs_ctx_t *ctx = NULL; int ret = -1; dict_t *dict = NULL; - char key[1024] = {0}; + char key[32] = {0}; + int len; int snapcount = 0; svs_private_t *priv = NULL; xlator_t *this = NULL; @@ -250,7 +257,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, this = frame->this; ctx = frame->this->ctx; priv = this->private; - old_dirents = priv->dirents; if (!ctx) { errno = EINVAL; @@ -325,8 +331,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, } for (i = 0; i < snapcount; i++) { - snprintf(key, sizeof(key), "snap-volname.%d", i + 1); - ret = dict_get_str(dict, key, &value); + len = snprintf(key, sizeof(key), "snap-volname.%d", i + 1); + ret = dict_get_strn(dict, key, len, &value); if (ret) { errno = EINVAL; ret = -1; @@ -338,8 +344,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, strncpy(dirents[i].snap_volname, value, sizeof(dirents[i].snap_volname)); - snprintf(key, sizeof(key), "snap-id.%d", i + 1); - ret = dict_get_str(dict, key, &value); + len = snprintf(key, sizeof(key), "snap-id.%d", i + 1); + ret = dict_get_strn(dict, key, len, &value); if (ret) { errno = EINVAL; ret = -1; @@ -349,8 +355,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, } strncpy(dirents[i].uuid, value, sizeof(dirents[i].uuid)); - snprintf(key, sizeof(key), "snapname.%d", i + 1); - ret = dict_get_str(dict, key, &value); + len = snprintf(key, sizeof(key), "snapname.%d", i + 1); + ret = dict_get_strn(dict, key, len, &value); if (ret) { errno = EINVAL; ret = -1; @@ -382,6 +388,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, LOCK(&priv->snaplist_lock); { oldcount = priv->num_snaps; + old_dirents = priv->dirents; for (i = 0; i < priv->num_snaps; i++) { for (j = 0; j < snapcount; j++) { if ((!strcmp(old_dirents[i].name, dirents[j].name)) && @@ -401,7 +408,12 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, if (old_dirents) { for (i = 0; i < oldcount; i++) { if (old_dirents[i].fs) - glfs_fini(old_dirents[i].fs); + gf_msg_debug(this->name, 0, + "calling glfs_fini on " + "name: %s, snap_volname: %s, uuid: %s", + old_dirents[i].name, old_dirents[i].snap_volname, + old_dirents[i].uuid); + glfs_fini(old_dirents[i].fs); } } diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c index b4998b88a8e..76cccae5914 100644 --- a/xlators/features/snapview-server/src/snapview-server.c +++ b/xlators/features/snapview-server/src/snapview-server.c @@ -828,7 +828,8 @@ out: * back into the dict. But to get the values for those xattrs it has to do the * getxattr operation on each xattr which might turn out to be a costly * operation. So for each of the xattrs present in the list, a 0 byte value - * ("") is set into the dict before unwinding. This can be treated as an + * ("") is set into the dict before unwinding. Since ("") is also a valid xattr + * value(in a file system) we use an extra key in the same dictionary as an * indicator to other xlators which want to cache the xattrs (as of now, * md-cache which caches acl and selinux related xattrs) to not to cache the * values of the xattrs present in the dict. @@ -871,6 +872,15 @@ svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size) list_offset += strlen(keybuffer) + 1; } /* while (remaining_size > 0) */ + /* Add an additional key to indicate that we don't need to cache these + * xattrs(with value "") */ + ret = dict_set_str(dict, "glusterfs.skip-cache", ""); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED, + "dict set operation for the key glusterfs.skip-cache failed."); + goto out; + } + ret = 0; out: @@ -997,8 +1007,8 @@ svs_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, op_errno = ENOMEM; gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, "failed to add xattrs from the list to " - "dict for %s (gfid: %s, key: %s)", - loc->path, uuid_utoa(loc->inode->gfid), name); + "dict for %s (gfid: %s)", + loc->path, uuid_utoa(loc->inode->gfid)); goto out; } GF_FREE(value); @@ -1179,8 +1189,8 @@ svs_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, op_errno = ENOMEM; gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, "failed to add xattrs from the list " - "to dict (gfid: %s, key: %s)", - uuid_utoa(fd->inode->gfid), name); + "to dict (gfid: %s)", + uuid_utoa(fd->inode->gfid)); goto out; } GF_FREE(value); @@ -2002,7 +2012,9 @@ svs_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) "failed", loc->name, uuid_utoa(loc->inode->gfid)); goto out; - } + } else + gf_msg_debug(this->name, 0, "stat on %s (%s) successful", loc->path, + uuid_utoa(loc->inode->gfid)); iatt_from_stat(&buf, &stat); gf_uuid_copy(buf.ia_gfid, loc->inode->gfid); diff --git a/xlators/features/snapview-server/src/snapview-server.h b/xlators/features/snapview-server/src/snapview-server.h index b25801901ed..6472422e715 100644 --- a/xlators/features/snapview-server/src/snapview-server.h +++ b/xlators/features/snapview-server/src/snapview-server.h @@ -19,10 +19,8 @@ #include <ctype.h> #include <sys/uio.h> #include <glusterfs/glusterfs.h> -#include <glusterfs/xlator.h> #include <glusterfs/logging.h> #include "glfs.h" -#include <glusterfs/common-utils.h> #include "glfs-handles.h" #include "glfs-internal.h" #include "glusterfs3-xdr.h" @@ -58,9 +56,16 @@ { \ for (i = 0; i < _private->num_snaps; i++) { \ tmp_fs = _private->dirents[i].fs; \ - gf_log(this->name, GF_LOG_DEBUG, "dirent->fs: %p", tmp_fs); \ + gf_log(this->name, GF_LOG_DEBUG, \ + "snap name: %s, snap volume: %s," \ + "dirent->fs: %p", \ + _private->dirents[i].name, \ + _private->dirents[i].snap_volname, tmp_fs); \ if (tmp_fs && fs && (tmp_fs == fs)) { \ found = _gf_true; \ + gf_msg_debug(this->name, 0, \ + "found the fs " \ + "instance"); \ break; \ } \ } \ diff --git a/xlators/features/thin-arbiter/src/Makefile.am b/xlators/features/thin-arbiter/src/Makefile.am index 7fd31a66caa..a3c133e7798 100644 --- a/xlators/features/thin-arbiter/src/Makefile.am +++ b/xlators/features/thin-arbiter/src/Makefile.am @@ -1,6 +1,4 @@ -if WITH_SERVER xlator_LTLIBRARIES = thin-arbiter.la -endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index d66843625d3..7d09cba3e9c 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -170,7 +170,7 @@ store_eliminate_path(char *str, trash_elim_path **eliminate) int ret = 0; char *strtokptr = NULL; - if (eliminate == NULL) { + if ((str == NULL) || (eliminate == NULL)) { ret = EINVAL; goto out; } @@ -212,11 +212,11 @@ void append_time_stamp(char *name, size_t name_size) { int i; - char timestr[64] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; - gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS); + gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS); /* removing white spaces in timestamp */ for (i = 0; i < strlen(timestr); i++) { @@ -2523,6 +2523,7 @@ out: GF_FREE(priv); } mem_pool_destroy(this->local_pool); + this->local_pool = NULL; } return ret; } diff --git a/xlators/features/upcall/src/upcall-cache-invalidation.h b/xlators/features/upcall/src/upcall-cache-invalidation.h index e509a89acd5..db649b2c9a6 100644 --- a/xlators/features/upcall/src/upcall-cache-invalidation.h +++ b/xlators/features/upcall/src/upcall-cache-invalidation.h @@ -15,10 +15,4 @@ * events post its last access */ #define CACHE_INVALIDATION_TIMEOUT "60" -/* xlator options */ -gf_boolean_t -is_cache_invalidation_enabled(xlator_t *this); -int32_t -get_cache_invalidation_timeout(xlator_t *this); - #endif /* __UPCALL_CACHE_INVALIDATION_H__ */ diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c index 46cf6f840f0..c641bd6f432 100644 --- a/xlators/features/upcall/src/upcall-internal.c +++ b/xlators/features/upcall/src/upcall-internal.c @@ -35,62 +35,37 @@ gf_boolean_t is_upcall_enabled(xlator_t *this) { upcall_private_t *priv = NULL; - gf_boolean_t is_enabled = _gf_false; if (this->private) { priv = (upcall_private_t *)this->private; - - if (priv->cache_invalidation_enabled) { - is_enabled = _gf_true; - } + return priv->cache_invalidation_enabled; } - return is_enabled; + return _gf_false; } /* * Get the cache_invalidation_timeout */ -int32_t +static int32_t get_cache_invalidation_timeout(xlator_t *this) { upcall_private_t *priv = NULL; - int32_t timeout = 0; if (this->private) { priv = (upcall_private_t *)this->private; - timeout = priv->cache_invalidation_timeout; - } - - return timeout; -} - -/* - * Allocate and add a new client entry to the given upcall entry - */ -upcall_client_t * -add_upcall_client(call_frame_t *frame, client_t *client, - upcall_inode_ctx_t *up_inode_ctx) -{ - upcall_client_t *up_client_entry = NULL; - - pthread_mutex_lock(&up_inode_ctx->client_list_lock); - { - up_client_entry = __add_upcall_client(frame, client, up_inode_ctx); + return priv->cache_invalidation_timeout; } - pthread_mutex_unlock(&up_inode_ctx->client_list_lock); - return up_client_entry; + return 0; } -upcall_client_t * +static upcall_client_t * __add_upcall_client(call_frame_t *frame, client_t *client, - upcall_inode_ctx_t *up_inode_ctx) + upcall_inode_ctx_t *up_inode_ctx, time_t now) { - upcall_client_t *up_client_entry = NULL; - - up_client_entry = GF_CALLOC(1, sizeof(*up_client_entry), - gf_upcall_mt_upcall_client_entry_t); + upcall_client_t *up_client_entry = GF_MALLOC( + sizeof(*up_client_entry), gf_upcall_mt_upcall_client_entry_t); if (!up_client_entry) { gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY, "Memory allocation failed"); @@ -98,7 +73,7 @@ __add_upcall_client(call_frame_t *frame, client_t *client, } INIT_LIST_HEAD(&up_client_entry->client_list); up_client_entry->client_uid = gf_strdup(client->client_uid); - up_client_entry->access_time = time(NULL); + up_client_entry->access_time = now; up_client_entry->expire_time_attr = get_cache_invalidation_timeout( frame->this); @@ -110,39 +85,7 @@ __add_upcall_client(call_frame_t *frame, client_t *client, return up_client_entry; } -/* - * Given client->uid, retrieve the corresponding upcall client entry. - * If none found, create a new entry. - */ -upcall_client_t * -__get_upcall_client(call_frame_t *frame, client_t *client, - upcall_inode_ctx_t *up_inode_ctx) -{ - upcall_client_t *up_client_entry = NULL; - upcall_client_t *tmp = NULL; - gf_boolean_t found_client = _gf_false; - - list_for_each_entry_safe(up_client_entry, tmp, &up_inode_ctx->client_list, - client_list) - { - if (strcmp(client->client_uid, up_client_entry->client_uid) == 0) { - /* found client entry. Update the access_time */ - up_client_entry->access_time = time(NULL); - found_client = _gf_true; - gf_log(THIS->name, GF_LOG_DEBUG, "upcall_entry_t client found - %s", - up_client_entry->client_uid); - break; - } - } - - if (!found_client) { /* create one */ - up_client_entry = __add_upcall_client(frame, client, up_inode_ctx); - } - - return up_client_entry; -} - -int +static int __upcall_inode_ctx_set(inode_t *inode, xlator_t *this) { upcall_inode_ctx_t *inode_ctx = NULL; @@ -158,7 +101,7 @@ __upcall_inode_ctx_set(inode_t *inode, xlator_t *this) if (!ret) goto out; - inode_ctx = GF_CALLOC(1, sizeof(upcall_inode_ctx_t), + inode_ctx = GF_MALLOC(sizeof(upcall_inode_ctx_t), gf_upcall_mt_upcall_inode_ctx_t); if (!inode_ctx) { @@ -190,7 +133,7 @@ out: return ret; } -upcall_inode_ctx_t * +static upcall_inode_ctx_t * __upcall_inode_ctx_get(inode_t *inode, xlator_t *this) { upcall_inode_ctx_t *inode_ctx = NULL; @@ -229,8 +172,20 @@ upcall_inode_ctx_get(inode_t *inode, xlator_t *this) return inode_ctx; } -int -upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx) +static int +__upcall_cleanup_client_entry(upcall_client_t *up_client) +{ + list_del_init(&up_client->client_list); + + GF_FREE(up_client->client_uid); + GF_FREE(up_client); + + return 0; +} + +static int +upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx, + time_t now) { upcall_client_t *up_client = NULL; upcall_client_t *tmp = NULL; @@ -245,7 +200,7 @@ upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx) list_for_each_entry_safe(up_client, tmp, &up_inode_ctx->client_list, client_list) { - t_expired = time(NULL) - up_client->access_time; + t_expired = now - up_client->access_time; if (t_expired > (2 * timeout)) { gf_log(THIS->name, GF_LOG_TRACE, "Cleaning up client_entry(%s)", @@ -269,17 +224,6 @@ out: return ret; } -int -__upcall_cleanup_client_entry(upcall_client_t *up_client) -{ - list_del_init(&up_client->client_list); - - GF_FREE(up_client->client_uid); - GF_FREE(up_client); - - return 0; -} - /* * Free Upcall inode_ctx client list */ @@ -298,6 +242,10 @@ __upcall_cleanup_inode_ctx_client_list(upcall_inode_ctx_t *inode_ctx) return 0; } +static void +upcall_cache_forget(xlator_t *this, inode_t *inode, + upcall_inode_ctx_t *up_inode_ctx); + /* * Free upcall_inode_ctx */ @@ -360,6 +308,7 @@ upcall_reaper_thread(void *data) upcall_inode_ctx_t *tmp = NULL; xlator_t *this = NULL; time_t timeout = 0; + time_t time_now; this = (xlator_t *)data; GF_ASSERT(this); @@ -367,33 +316,35 @@ upcall_reaper_thread(void *data) priv = this->private; GF_ASSERT(priv); + time_now = gf_time(); while (!priv->fini) { list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list, inode_ctx_list) { /* cleanup expired clients */ - upcall_cleanup_expired_clients(this, inode_ctx); + upcall_cleanup_expired_clients(this, inode_ctx, time_now); if (!inode_ctx->destroy) { continue; } + /* client list would have been cleaned up*/ + gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)", + inode_ctx); LOCK(&priv->inode_ctx_lk); { - /* client list would have been cleaned up*/ - gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)", - inode_ctx); list_del_init(&inode_ctx->inode_ctx_list); pthread_mutex_destroy(&inode_ctx->client_list_lock); - GF_FREE(inode_ctx); - inode_ctx = NULL; } UNLOCK(&priv->inode_ctx_lk); + GF_FREE(inode_ctx); + inode_ctx = NULL; } /* don't do a very busy loop */ timeout = get_cache_invalidation_timeout(this); sleep(timeout / 2); + time_now = gf_time(); } return NULL; @@ -486,6 +437,13 @@ up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs) return ret; } +static void +upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid, + upcall_client_t *up_client_entry, uint32_t flags, + struct iatt *stbuf, struct iatt *p_stbuf, + struct iatt *oldp_stbuf, dict_t *xattr, + time_t now); + gf_boolean_t up_invalidate_needed(dict_t *xattrs) { @@ -520,6 +478,8 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, upcall_client_t *tmp = NULL; upcall_inode_ctx_t *up_inode_ctx = NULL; gf_boolean_t found = _gf_false; + time_t time_now; + inode_t *linked_inode = NULL; if (!is_upcall_enabled(this)) return; @@ -532,7 +492,20 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, return; } - if (inode) + /* For nameless LOOKUPs, inode created shall always be + * invalid. Hence check if there is any already linked inode. + * If yes, update the inode_ctx of that valid inode + */ + if (inode && (inode->ia_type == IA_INVAL) && stbuf) { + linked_inode = inode_find(inode->table, stbuf->ia_gfid); + if (linked_inode) { + gf_log("upcall", GF_LOG_DEBUG, + "upcall_inode_ctx_get of linked inode (%p)", inode); + up_inode_ctx = upcall_inode_ctx_get(linked_inode, this); + } + } + + if (inode && !up_inode_ctx) up_inode_ctx = upcall_inode_ctx_get(inode, this); if (!up_inode_ctx) { @@ -560,6 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, goto out; } + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, @@ -567,7 +541,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, { /* Do not send UPCALL event if same client. */ if (!strcmp(client->client_uid, up_client_entry->client_uid)) { - up_client_entry->access_time = time(NULL); + up_client_entry->access_time = time_now; found = _gf_true; continue; } @@ -589,17 +563,21 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, * Also if the file is frequently accessed, set * expire_time_attr to 0. */ - upcall_client_cache_invalidate(this, up_inode_ctx->gfid, - up_client_entry, flags, stbuf, - p_stbuf, oldp_stbuf, xattr); + upcall_client_cache_invalidate( + this, up_inode_ctx->gfid, up_client_entry, flags, stbuf, + p_stbuf, oldp_stbuf, xattr, time_now); } if (!found) { - up_client_entry = __add_upcall_client(frame, client, up_inode_ctx); + up_client_entry = __add_upcall_client(frame, client, up_inode_ctx, + time_now); } } pthread_mutex_unlock(&up_inode_ctx->client_list_lock); out: + /* release the ref from inode_find */ + if (linked_inode) + inode_unref(linked_inode); return; } @@ -607,11 +585,12 @@ out: * If the upcall_client_t has recently accessed the file (i.e, within * priv->cache_invalidation_timeout), send a upcall notification. */ -void +static void upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid, upcall_client_t *up_client_entry, uint32_t flags, struct iatt *stbuf, struct iatt *p_stbuf, - struct iatt *oldp_stbuf, dict_t *xattr) + struct iatt *oldp_stbuf, dict_t *xattr, + time_t now) { struct gf_upcall up_req = { 0, @@ -621,7 +600,7 @@ upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid, }; time_t timeout = 0; int ret = -1; - time_t t_expired = time(NULL) - up_client_entry->access_time; + time_t t_expired = now - up_client_entry->access_time; GF_VALIDATE_OR_GOTO("upcall_client_cache_invalidate", !(gf_uuid_is_null(gfid)), out); @@ -678,32 +657,32 @@ out: * Send "UP_FORGET" to all the clients so that they invalidate their cache * entry and do a fresh lookup next time when any I/O comes in. */ -void +static void upcall_cache_forget(xlator_t *this, inode_t *inode, upcall_inode_ctx_t *up_inode_ctx) { upcall_client_t *up_client_entry = NULL; upcall_client_t *tmp = NULL; - uint32_t flags = 0; + uint32_t flags = UP_FORGET; + time_t time_now; if (!up_inode_ctx) { return; } + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, &up_inode_ctx->client_list, client_list) { - flags = UP_FORGET; - - /* Set the access time to time(NULL) + /* Set the access time to gf_time() * to send notify */ - up_client_entry->access_time = time(NULL); + up_client_entry->access_time = time_now; upcall_client_cache_invalidate(this, up_inode_ctx->gfid, up_client_entry, flags, NULL, NULL, - NULL, NULL); + NULL, NULL, time_now); } } pthread_mutex_unlock(&up_inode_ctx->client_list_lock); diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c index 2583c50ef3f..0795f58059d 100644 --- a/xlators/features/upcall/src/upcall.c +++ b/xlators/features/upcall/src/upcall.c @@ -57,14 +57,13 @@ static int32_t up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -111,14 +110,13 @@ up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -167,14 +165,13 @@ static int32_t up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -220,14 +217,13 @@ static int32_t up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -274,14 +270,13 @@ static int32_t up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -343,14 +338,13 @@ static int32_t up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -410,14 +404,13 @@ static int32_t up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -472,14 +465,13 @@ static int32_t up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -531,14 +523,13 @@ static int32_t up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -592,14 +583,13 @@ static int32_t up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -653,14 +643,13 @@ static int32_t up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode_t umask, dict_t *params) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -717,15 +706,13 @@ static int32_t up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); - if (!local) { - op_errno = ENOMEM; goto err; } @@ -774,14 +761,13 @@ out: static int32_t up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -826,14 +812,13 @@ out: static int32_t up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -852,14 +837,13 @@ err: static int32_t up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -879,14 +863,13 @@ static int32_t up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -932,14 +915,13 @@ static int32_t up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -986,14 +968,13 @@ static int32_t up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1047,14 +1028,13 @@ static int32_t up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1110,14 +1090,13 @@ static int32_t up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1164,14 +1143,13 @@ static int32_t up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1216,14 +1194,13 @@ out: static int32_t up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1270,14 +1247,13 @@ static int32_t up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1334,14 +1310,13 @@ static int32_t up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, dict_t *dict) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1361,14 +1336,13 @@ static int32_t up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1415,14 +1389,13 @@ static int32_t up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, off_t offset, size_t len, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1470,14 +1443,13 @@ static int32_t up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1524,14 +1496,13 @@ static int up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1577,14 +1548,13 @@ static int32_t up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, gf_seek_what_t what, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1652,14 +1622,13 @@ static int32_t up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1727,14 +1696,13 @@ static int32_t up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1800,7 +1768,7 @@ static int32_t up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; dict_t *xattr = NULL; @@ -1808,13 +1776,11 @@ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, xattr = dict_for_key_value(name, "", 1, _gf_true); if (!xattr) { - op_errno = ENOMEM; goto err; } local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1885,7 +1851,7 @@ static int32_t up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; dict_t *xattr = NULL; @@ -1893,13 +1859,11 @@ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, xattr = dict_for_key_value(name, "", 1, _gf_true); if (!xattr) { - op_errno = ENOMEM; goto err; } local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr); if (!local) { - op_errno = ENOMEM; goto err; } @@ -1950,14 +1914,13 @@ static int32_t up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } @@ -2000,14 +1963,13 @@ static int32_t up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - int32_t op_errno = -1; + int32_t op_errno = ENOMEM; upcall_local_t *local = NULL; EXIT_IF_UPCALL_OFF(this, out); local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); if (!local) { - op_errno = ENOMEM; goto err; } diff --git a/xlators/features/upcall/src/upcall.h b/xlators/features/upcall/src/upcall.h index bcaf6b01086..aa535088ad7 100644 --- a/xlators/features/upcall/src/upcall.h +++ b/xlators/features/upcall/src/upcall.h @@ -100,32 +100,10 @@ upcall_local_t * upcall_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, inode_t *inode, dict_t *xattr); -upcall_client_t * -add_upcall_client(call_frame_t *frame, client_t *client, - upcall_inode_ctx_t *up_inode_ctx); -upcall_client_t * -__add_upcall_client(call_frame_t *frame, client_t *client, - upcall_inode_ctx_t *up_inode_ctx); -upcall_client_t * -__get_upcall_client(call_frame_t *frame, client_t *client, - upcall_inode_ctx_t *up_inode_ctx); -int -__upcall_cleanup_client_entry(upcall_client_t *up_client); -int -upcall_cleanup_expired_clients(xlator_t *this, - upcall_inode_ctx_t *up_inode_ctx); - -int -__upcall_inode_ctx_set(inode_t *inode, xlator_t *this); -upcall_inode_ctx_t * -__upcall_inode_ctx_get(inode_t *inode, xlator_t *this); upcall_inode_ctx_t * upcall_inode_ctx_get(inode_t *inode, xlator_t *this); int upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode); -void -upcall_cache_forget(xlator_t *this, inode_t *inode, - upcall_inode_ctx_t *up_inode_ctx); void * upcall_reaper_thread(void *data); @@ -142,12 +120,6 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, inode_t *inode, uint32_t flags, struct iatt *stbuf, struct iatt *p_stbuf, struct iatt *oldp_stbuf, dict_t *xattr); -void -upcall_client_cache_invalidate(xlator_t *xl, uuid_t gfid, - upcall_client_t *up_client_entry, uint32_t flags, - struct iatt *stbuf, struct iatt *p_stbuf, - struct iatt *oldp_stbuf, dict_t *xattr); - int up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs); diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py index 8730a51d13e..9fb3e1b8b1a 100755 --- a/xlators/features/utime/src/utime-gen-fops-c.py +++ b/xlators/features/utime/src/utime-gen-fops-c.py @@ -95,6 +95,16 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, frame->root->flags |= MDATA_CTIME; } + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + if (valid & GF_ATTR_ATIME_NOW) { + frame->root->ctime.tv_sec = stbuf->ia_atime; + frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec; + } else if (valid & GF_ATTR_MTIME_NOW) { + frame->root->ctime.tv_sec = stbuf->ia_mtime; + frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec; + } + } + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); return 0; diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c index 79cc0145f50..29d9ad93561 100644 --- a/xlators/features/utime/src/utime-helpers.c +++ b/xlators/features/utime/src/utime-helpers.c @@ -17,7 +17,7 @@ gl_timespec_get(struct timespec *ts) #ifdef TIME_UTC timespec_get(ts, TIME_UTC); #else - timespec_now(ts); + timespec_now_realtime(ts); #endif } diff --git a/xlators/features/utime/src/utime-helpers.h b/xlators/features/utime/src/utime-helpers.h index f2dfeed3a41..2e32d4bece6 100644 --- a/xlators/features/utime/src/utime-helpers.h +++ b/xlators/features/utime/src/utime-helpers.h @@ -11,7 +11,6 @@ #ifndef _UTIME_HELPERS_H #define _UTIME_HELPERS_H -#include "glusterfs-fops.h" #include <glusterfs/stack.h> #include <glusterfs/xlator.h> #include <glusterfs/timespec.h> diff --git a/xlators/features/utime/src/utime-messages.h b/xlators/features/utime/src/utime-messages.h index bac18aba460..bd40265abaf 100644 --- a/xlators/features/utime/src/utime-messages.h +++ b/xlators/features/utime/src/utime-messages.h @@ -23,6 +23,7 @@ * glfs-message-id.h. */ -GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY); +GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY, UTIME_MSG_SET_MDATA_FAILED, + UTIME_MSG_DICT_SET_FAILED); #endif /* __UTIME_MESSAGES_H__ */ diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c index 877c751c764..2acc63e6a05 100644 --- a/xlators/features/utime/src/utime.c +++ b/xlators/features/utime/src/utime.c @@ -9,8 +9,10 @@ */ #include "utime.h" +#include "utime-helpers.h" #include "utime-messages.h" #include "utime-mem-types.h" +#include <glusterfs/call-stub.h> int32_t gf_utime_invalidate(xlator_t *this, inode_t *inode) @@ -133,6 +135,141 @@ mem_acct_init(xlator_t *this) } int32_t +gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xdata) +{ + call_stub_t *stub = frame->local; + /* Don't fail lookup if mdata setxattr fails */ + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED, + "dict set of key for set-ctime-mdata failed"); + } + frame->local = NULL; + call_resume(stub); + STACK_DESTROY(frame->root); + return 0; +} + +int32_t +gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xdata, + struct iatt *postparent) +{ + dict_t *dict = NULL; + struct mdata_iatt *mdata = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + call_frame_t *new_frame = NULL; + + if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) { + dict = dict_new(); + if (!dict) { + op_errno = ENOMEM; + goto err; + } + mdata = GF_MALLOC(sizeof(struct mdata_iatt), gf_common_mt_char); + if (mdata == NULL) { + op_errno = ENOMEM; + goto err; + } + iatt_to_mdata(mdata, stbuf); + ret = dict_set_mdata(dict, CTIME_MDATA_XDATA_KEY, mdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY, + "dict set of key for set-ctime-mdata failed"); + goto err; + } + new_frame = copy_frame(frame); + if (!new_frame) { + op_errno = ENOMEM; + goto stub_err; + } + + new_frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk, + op_ret, op_errno, inode, stbuf, + xdata, postparent); + if (!new_frame->local) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY, + "lookup_cbk stub allocation failed"); + op_errno = ENOMEM; + STACK_DESTROY(new_frame->root); + goto stub_err; + } + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, stbuf->ia_gfid); + + new_frame->root->uid = 0; + new_frame->root->gid = 0; + new_frame->root->pid = GF_CLIENT_PID_SET_UTIME; + STACK_WIND(new_frame, gf_utime_set_mdata_setxattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, &loc, + dict, 0, NULL); + + dict_unref(dict); + inode_unref(loc.inode); + return 0; + } + + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata, + postparent); + return 0; + +err: + if (mdata) { + GF_FREE(mdata); + } +stub_err: + if (dict) { + dict_unref(dict); + } + STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; +} + +int +gf_utime_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + int op_errno = EINVAL; + int ret = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + + xdata = xdata ? dict_ref(xdata) : dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto err; + } + + ret = dict_set_int8(xdata, GF_XATTR_MDATA_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, UTIME_MSG_DICT_SET_FAILED, + "%s: Unable to set dict value for %s", loc->path, + GF_XATTR_MDATA_KEY); + op_errno = -ret; + goto free_dict; + } + + STACK_WIND(frame, gf_utime_set_mdata_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref(xdata); + return 0; + +free_dict: + dict_unref(xdata); +err: + STACK_UNWIND_STRICT(lookup, frame, ret, op_errno, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t init(xlator_t *this) { utime_priv_t *utime = NULL; @@ -182,19 +319,27 @@ notify(xlator_t *this, int event, void *data, ...) } struct xlator_fops fops = { - /* TODO: Need to go through other fops and - * check if they modify time attributes - */ - .rename = gf_utime_rename, .mknod = gf_utime_mknod, - .readv = gf_utime_readv, .fremovexattr = gf_utime_fremovexattr, - .open = gf_utime_open, .create = gf_utime_create, - .mkdir = gf_utime_mkdir, .writev = gf_utime_writev, - .rmdir = gf_utime_rmdir, .fallocate = gf_utime_fallocate, - .truncate = gf_utime_truncate, .symlink = gf_utime_symlink, - .zerofill = gf_utime_zerofill, .link = gf_utime_link, - .ftruncate = gf_utime_ftruncate, .unlink = gf_utime_unlink, - .setattr = gf_utime_setattr, .fsetattr = gf_utime_fsetattr, - .opendir = gf_utime_opendir, .removexattr = gf_utime_removexattr, + .rename = gf_utime_rename, + .mknod = gf_utime_mknod, + .readv = gf_utime_readv, + .fremovexattr = gf_utime_fremovexattr, + .open = gf_utime_open, + .create = gf_utime_create, + .mkdir = gf_utime_mkdir, + .writev = gf_utime_writev, + .rmdir = gf_utime_rmdir, + .fallocate = gf_utime_fallocate, + .truncate = gf_utime_truncate, + .symlink = gf_utime_symlink, + .zerofill = gf_utime_zerofill, + .link = gf_utime_link, + .ftruncate = gf_utime_ftruncate, + .unlink = gf_utime_unlink, + .setattr = gf_utime_setattr, + .fsetattr = gf_utime_fsetattr, + .opendir = gf_utime_opendir, + .removexattr = gf_utime_removexattr, + .lookup = gf_utime_lookup, }; struct xlator_cbks cbks = { .invalidate = gf_utime_invalidate, |
