diff options
Diffstat (limited to 'xlators/performance')
-rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 36 | ||||
-rw-r--r-- | xlators/performance/io-cache/src/io-cache.h | 27 | ||||
-rw-r--r-- | xlators/performance/io-cache/src/page.c | 6 | ||||
-rw-r--r-- | xlators/performance/io-threads/src/io-threads.c | 11 | ||||
-rw-r--r-- | xlators/performance/md-cache/src/md-cache.c | 428 | ||||
-rw-r--r-- | xlators/performance/nl-cache/src/nl-cache-helper.c | 8 | ||||
-rw-r--r-- | xlators/performance/nl-cache/src/nl-cache.c | 6 | ||||
-rw-r--r-- | xlators/performance/nl-cache/src/nl-cache.h | 2 | ||||
-rw-r--r-- | xlators/performance/open-behind/src/open-behind-messages.h | 6 | ||||
-rw-r--r-- | xlators/performance/open-behind/src/open-behind.c | 1340 | ||||
-rw-r--r-- | xlators/performance/quick-read/src/quick-read.c | 41 | ||||
-rw-r--r-- | xlators/performance/quick-read/src/quick-read.h | 2 | ||||
-rw-r--r-- | xlators/performance/write-behind/src/write-behind.c | 4 |
13 files changed, 828 insertions, 1089 deletions
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index c007e0a355d..9375d29c17f 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -133,23 +133,17 @@ ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode, return 0; } -int32_t +static gf_boolean_t ioc_inode_need_revalidate(ioc_inode_t *ioc_inode) { - int8_t need_revalidate = 0; - struct timeval tv = { - 0, - }; ioc_table_t *table = NULL; + GF_ASSERT(ioc_inode); table = ioc_inode->table; + GF_ASSERT(table); - gettimeofday(&tv, NULL); - - if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout) - need_revalidate = 1; - - return need_revalidate; + return (gf_time() - ioc_inode->cache.last_revalidate >= + table->cache_timeout); } /* @@ -411,9 +405,6 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ioc_inode_t *ioc_inode = NULL; size_t destroy_size = 0; struct iatt *local_stbuf = NULL; - struct timeval tv = { - 0, - }; local = frame->local; ioc_inode = local->inode; @@ -451,10 +442,9 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret < 0) local_stbuf = NULL; - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -1405,9 +1395,6 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, { ioc_inode_t *ioc_inode = NULL; uint64_t tmp_inode = 0; - struct timeval tv = { - 0, - }; inode_ctx_get(fd->inode, this, &tmp_inode); ioc_inode = (ioc_inode_t *)(long)tmp_inode; @@ -1418,10 +1405,9 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, return 0; } - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -1945,7 +1931,7 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) char key[GF_DUMP_MAX_BUF_LEN] = { 0, }; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; @@ -1955,11 +1941,9 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) table = ioc_inode->table; - if (ioc_inode->cache.tv.tv_sec) { - gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.tv.tv_sec, + if (ioc_inode->cache.last_revalidate) { + gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); gf_proc_dump_write("last-cache-validation-time", "%s", timestr); } diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index 4303c2fae13..14923c75edc 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -117,15 +117,13 @@ struct ioc_page { struct ioc_cache { rbthash_table_t *page_table; struct list_head page_lru; - time_t mtime; /* - * seconds component of file mtime - */ - time_t mtime_nsec; /* - * nanosecond component of file mtime - */ - struct timeval tv; /* - * time-stamp at last re-validate - */ + time_t mtime; /* + * seconds component of file mtime + */ + time_t mtime_nsec; /* + * nanosecond component of file mtime + */ + time_t last_revalidate; /* timestamp at last re-validate */ }; struct ioc_inode { @@ -270,17 +268,6 @@ ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size, pthread_mutex_unlock(&page->page_lock); \ } while (0) -static inline uint64_t -time_elapsed(struct timeval *now, struct timeval *then) -{ - uint64_t sec = now->tv_sec - then->tv_sec; - - if (sec) - return sec; - - return 0; -} - ioc_inode_t * ioc_inode_search(ioc_table_t *table, inode_t *inode); diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index a8edbde23f2..84b1ae6cb20 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -413,9 +413,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ioc_waitq_t *waitq = NULL; size_t iobref_page_size = 0; char zero_filled = 0; - struct timeval tv = { - 0, - }; GF_ASSERT(frame); @@ -431,7 +428,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0)); - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { if (op_ret == -1 || @@ -448,7 +444,7 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec; } - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); if (op_ret < 0) { /* error, readv returned -1 */ diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 6fa4d88389c..3d24cc97f4b 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -1016,16 +1016,13 @@ static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */ static void iot_apply_event(xlator_t *this, threshold_t *thresh) { - struct timespec now; - time_t delta; + time_t delta, now = gf_time(); /* Refresh for manual testing/debugging. It's cheap. */ THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1); - timespec_now(&now); - if (thresh->value && thresh->update_time) { - delta = now.tv_sec - thresh->update_time; + delta = now - thresh->update_time; /* Be careful about underflow. */ if (thresh->value <= delta) { thresh->value = 0; @@ -1046,7 +1043,7 @@ iot_apply_event(xlator_t *this, threshold_t *thresh) kill(getpid(), SIGTRAP); } - thresh->update_time = now.tv_sec; + thresh->update_time = now; } static void * @@ -1311,7 +1308,7 @@ notify(xlator_t *this, int32_t event, void *data, ...) /* Wait for draining stub from queue before notify PARENT_DOWN */ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt); if (stub_cnt) { - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; pthread_mutex_lock(&conf->mutex); { diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 4c76f3089d5..a405be51f02 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ -#include <glusterfs/timespec.h> #include <glusterfs/glusterfs.h> #include <glusterfs/defaults.h> #include <glusterfs/logging.h> @@ -33,8 +32,7 @@ struct mdc_statfs_cache { pthread_mutex_t lock; - gf_boolean_t initialized; - struct timespec last_refreshed; + time_t last_refreshed; /* (time_t)-1 if not yet initialized. */ struct statvfs buf; }; @@ -61,7 +59,7 @@ struct mdc_statistics { }; struct mdc_conf { - int timeout; + uint32_t timeout; gf_boolean_t cache_posix_acl; gf_boolean_t cache_glusterfs_acl; gf_boolean_t cache_selinux; @@ -132,6 +130,7 @@ struct mdc_local { char *key; dict_t *xattr; uint64_t incident_time; + bool update_cache; }; int @@ -375,10 +374,9 @@ unlock: static gf_boolean_t __is_cache_valid(xlator_t *this, time_t mdc_time) { - time_t now = 0; gf_boolean_t ret = _gf_true; struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; time_t last_child_down = 0; conf = this->private; @@ -392,15 +390,13 @@ __is_cache_valid(xlator_t *this, time_t mdc_time) last_child_down = conf->last_child_down; timeout = conf->timeout; - time(&now); - if ((mdc_time == 0) || ((last_child_down != 0) && (mdc_time < last_child_down))) { ret = _gf_false; goto out; } - if (now >= (mdc_time + timeout)) { + if (gf_time() >= (mdc_time + timeout)) { ret = _gf_false; } @@ -580,10 +576,9 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, mdc_from_iatt(mdc, iatt); mdc->valid = _gf_true; if (update_time) { - time(&mdc->ia_time); - + mdc->ia_time = gf_time(); if (mdc->xa_time && update_xa_time) - time(&mdc->xa_time); + mdc->xa_time = mdc->ia_time; } gf_msg_callingfn( @@ -784,7 +779,7 @@ mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict) if (newdict) mdc->xattr = newdict; - time(&mdc->xa_time); + mdc->xa_time = gf_time(); gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld", uuid_utoa(inode->gfid), (long long)mdc->xa_time); } @@ -985,7 +980,7 @@ out: return ret; } -void +static bool mdc_load_reqs(xlator_t *this, dict_t *dict) { struct mdc_conf *conf = this->private; @@ -994,6 +989,7 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) char *tmp = NULL; char *tmp1 = NULL; int ret = 0; + bool loaded = false; tmp1 = conf->mdc_xattr_str; if (!tmp1) @@ -1011,13 +1007,17 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) conf->mdc_xattr_str = NULL; gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE, "Disabled cache for xattrs, dict_set failed"); + goto out; } pattern = strtok_r(NULL, ",", &tmp); } - GF_FREE(mdc_xattr_str); + loaded = true; + out: - return; + GF_FREE(mdc_xattr_str); + + return loaded; } struct checkpair { @@ -1057,8 +1057,7 @@ mdc_cache_statfs(xlator_t *this, struct statvfs *buf) pthread_mutex_lock(&conf->statfs_cache.lock); { memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs)); - clock_gettime(CLOCK_MONOTONIC, &conf->statfs_cache.last_refreshed); - conf->statfs_cache.initialized = _gf_true; + conf->statfs_cache.last_refreshed = gf_time(); } pthread_mutex_unlock(&conf->statfs_cache.lock); } @@ -1067,8 +1066,7 @@ int mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) { struct mdc_conf *conf = this->private; - struct timespec now; - double cache_age = 0.0; + uint32_t cache_age = 0; int ret = 0; if (!buf || !conf) { @@ -1077,23 +1075,23 @@ mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) } *buf = NULL; - timespec_now(&now); pthread_mutex_lock(&conf->statfs_cache.lock); { - /* Skip if the cache is not initialized */ - if (!conf->statfs_cache.initialized) { + /* Skip if the cache is not initialized. */ + if (conf->statfs_cache.last_refreshed == (time_t)-1) { ret = -1; goto unlock; } - cache_age = (now.tv_sec - conf->statfs_cache.last_refreshed.tv_sec); + cache_age = (gf_time() - conf->statfs_cache.last_refreshed); - gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %lf", cache_age); + gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs", + cache_age); if (cache_age > conf->timeout) { - /* Expire the cache */ + /* Expire the cache. */ gf_log(this->name, GF_LOG_DEBUG, - "Cache age %lf exceeded timeout %d", cache_age, + "Cache age %u secs exceeded timeout %u secs", cache_age, conf->timeout); ret = -1; goto unlock; @@ -1107,6 +1105,31 @@ err: return ret; } +static dict_t * +mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata) +{ + if (xdata != NULL) { + dict_ref(xdata); + } + + if (local == NULL) { + return xdata; + } + + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + local->update_cache = false; + + return NULL; + } + } + + local->update_cache = mdc_load_reqs(this, xdata); + + return xdata; +} + int mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct statvfs *buf, @@ -1189,6 +1212,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; + if (!local) + goto out; + if (op_ret != 0) { if (op_errno == ENOENT) GF_ATOMIC_INC(conf->mdc_counter.negative_lookup); @@ -1206,9 +1232,6 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - if (!local) - goto out; - if (local->loc.parent) { mdc_inode_iatt_set(this, local->loc.parent, postparent, local->incident_time); @@ -1216,7 +1239,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->loc.inode) { mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, dict); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, dict); + } } out: MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict, @@ -1235,7 +1260,6 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) 0, }; dict_t *xattr_rsp = NULL; - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; struct mdc_conf *conf = this->private; @@ -1286,18 +1310,18 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) return 0; uncached: - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xdata); if (xattr_rsp) dict_unref(xattr_rsp); - if (xattr_alloc) - dict_unref(xattr_alloc); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1320,7 +1344,9 @@ mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, xdata); + } out: MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); @@ -1334,7 +1360,6 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) int ret; struct iatt stbuf; mdc_local_t *local = NULL; - dict_t *xattr_alloc = NULL; struct mdc_conf *conf = this->private; local = mdc_local_get(frame, loc->inode); @@ -1358,17 +1383,16 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) return 0; uncached: - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); GF_ATOMIC_INC(conf->mdc_counter.stat_miss); STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, loc, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1391,7 +1415,9 @@ mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time); - mdc_inode_xatt_set(this, local->fd->inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->fd->inode, xdata); + } out: MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); @@ -1405,14 +1431,13 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) int ret; struct iatt stbuf; mdc_local_t *local = NULL; - dict_t *xattr_alloc = NULL; struct mdc_conf *conf = this->private; local = mdc_local_get(frame, fd->inode); if (!local) goto uncached; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); ret = mdc_inode_iatt_get(this, fd->inode, &stbuf); if (ret != 0) @@ -1424,17 +1449,16 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) return 0; uncached: - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); GF_ATOMIC_INC(conf->mdc_counter.stat_miss); STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1473,8 +1497,9 @@ mdc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - local->loc.inode = inode_ref(loc->inode); + if (local != NULL) { + local->loc.inode = inode_ref(loc->inode); + } STACK_WIND(frame, mdc_truncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); @@ -1517,8 +1542,9 @@ mdc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_ftruncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); @@ -1566,9 +1592,10 @@ mdc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); @@ -1616,9 +1643,10 @@ mdc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_mkdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); @@ -1675,8 +1703,9 @@ mdc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); + if (local != NULL) { + loc_copy(&local->loc, loc); + } STACK_WIND(frame, mdc_unlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); @@ -1729,8 +1758,9 @@ mdc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); + if (local != NULL) { + loc_copy(&local->loc, loc); + } STACK_WIND(frame, mdc_rmdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, loc, flag, xdata); @@ -1777,13 +1807,22 @@ mdc_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { mdc_local_t *local = NULL; + char *name; + name = gf_strdup(linkname); + if (name == NULL) { + goto wind; + } local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + GF_FREE(name); + goto wind; + } loc_copy(&local->loc, loc); + local->linkname = name; - local->linkname = gf_strdup(linkname); - +wind: STACK_WIND(frame, mdc_symlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); return 0; @@ -1841,9 +1880,10 @@ mdc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, mdc_local_t *local = NULL; local = mdc_local_get(frame, oldloc->inode); - - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); + if (local != NULL) { + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + } STACK_WIND(frame, mdc_rename_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); @@ -1892,9 +1932,10 @@ mdc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, mdc_local_t *local = NULL; local = mdc_local_get(frame, oldloc->inode); - - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); + if (local != NULL) { + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + } STACK_WIND(frame, mdc_link_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); @@ -1943,9 +1984,10 @@ mdc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, @@ -1992,8 +2034,9 @@ mdc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, } local = mdc_local_get(frame, loc->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } out: STACK_WIND(frame, mdc_open_cbk, FIRST_CHILD(this), @@ -2034,8 +2077,9 @@ mdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); @@ -2076,8 +2120,9 @@ mdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, @@ -2093,15 +2138,14 @@ mdc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, mdc_local_t *local = NULL; local = frame->local; + if (!local) + goto out; if (op_ret != 0) { mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time); goto out; } - if (!local) - goto out; - mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf, _gf_true, local->incident_time); mdc_inode_xatt_update(this, local->loc.inode, xdata); @@ -2122,6 +2166,9 @@ mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, struct mdc_conf *conf = this->private; local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + goto wind; + } loc_copy(&local->loc, loc); @@ -2149,6 +2196,7 @@ mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, } } +wind: STACK_WIND(frame, mdc_setattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); @@ -2194,8 +2242,11 @@ mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, struct mdc_conf *conf = this->private; local = mdc_local_get(frame, fd->inode); + if (local == NULL) { + goto wind; + } - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) { if (!xdata) @@ -2221,6 +2272,7 @@ mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, } } +wind: STACK_WIND(frame, mdc_fsetattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); @@ -2262,8 +2314,9 @@ mdc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_fsync_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); @@ -2318,9 +2371,10 @@ mdc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xattr); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xattr); + } STACK_WIND(frame, mdc_setxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata); @@ -2376,9 +2430,10 @@ mdc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); - local->xattr = dict_ref(xattr); + if (local != NULL) { + local->fd = __fd_ref(fd); + local->xattr = dict_ref(xattr); + } STACK_WIND(frame, mdc_fsetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, fd, xattr, flags, xdata); @@ -2408,7 +2463,9 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - mdc_inode_xatt_set(this, local->loc.inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, xdata); + } out: MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); @@ -2425,19 +2482,19 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, mdc_local_t *local = NULL; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; - dict_t *xattr_alloc = NULL; - gf_boolean_t key_satisfied = _gf_true; + gf_boolean_t key_satisfied = _gf_false; local = mdc_local_get(frame, loc->inode); - if (!local) + if (!local) { goto uncached; + } loc_copy(&local->loc, loc); if (!is_mdc_key_satisfied(this, key)) { - key_satisfied = _gf_false; goto uncached; } + key_satisfied = _gf_true; ret = mdc_inode_xatt_get(this, loc->inode, &xattr); if (ret != 0) @@ -2458,18 +2515,17 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, uncached: if (key_satisfied) { - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); } GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, loc, key, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (key_satisfied && (xdata != NULL)) { + dict_unref(xdata); + } + return 0; } @@ -2496,7 +2552,9 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - mdc_inode_xatt_set(this, local->fd->inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->fd->inode, xdata); + } out: MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata); @@ -2513,14 +2571,13 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, dict_t *xattr = NULL; int op_errno = ENODATA; struct mdc_conf *conf = this->private; - dict_t *xattr_alloc = NULL; gf_boolean_t key_satisfied = _gf_true; local = mdc_local_get(frame, fd->inode); if (!local) goto uncached; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); if (!is_mdc_key_satisfied(this, key)) { key_satisfied = _gf_false; @@ -2546,18 +2603,17 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, uncached: if (key_satisfied) { - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); } GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (key_satisfied && (xdata != NULL)) { + dict_unref(xdata); + } + return 0; } @@ -2613,12 +2669,21 @@ mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int ret = 0; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; + char *name2; + + name2 = gf_strdup(name); + if (name2 == NULL) { + goto uncached; + } local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + GF_FREE(name2); + goto uncached; + } loc_copy(&local->loc, loc); - - local->key = gf_strdup(name); + local->key = name2; if (!is_mdc_key_satisfied(this, name)) goto uncached; @@ -2704,12 +2769,21 @@ mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, int ret = 0; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; + char *name2; - local = mdc_local_get(frame, fd->inode); + name2 = gf_strdup(name); + if (name2 == NULL) { + goto uncached; + } - local->fd = fd_ref(fd); + local = mdc_local_get(frame, fd->inode); + if (local == NULL) { + GF_FREE(name2); + goto uncached; + } - local->key = gf_strdup(name); + local->fd = __fd_ref(fd); + local->key = name2; if (!is_mdc_key_satisfied(this, name)) goto uncached; @@ -2767,27 +2841,23 @@ int mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - - if (!xdata) - xdata = xattr_alloc = dict_new(); - - if (xdata) { - /* Tell readdir-ahead to include these keys in xdata when it - * internally issues readdirp() in it's opendir_cbk */ - mdc_load_reqs(this, xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); } + /* Tell readdir-ahead to include these keys in xdata when it + * internally issues readdirp() in it's opendir_cbk */ + xdata = mdc_prepare_request(this, local, xdata); + STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (xdata != NULL) { + dict_unref(xdata); + } return 0; } @@ -2815,7 +2885,9 @@ mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, continue; mdc_inode_iatt_set(this, entry->inode, &entry->d_stat, local->incident_time); - mdc_inode_xatt_set(this, entry->inode, entry->dict); + if (local->update_cache) { + mdc_inode_xatt_set(this, entry->inode, entry->dict); + } } unwind: @@ -2827,24 +2899,23 @@ int mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); if (!local) goto out; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; out: MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); @@ -2875,7 +2946,6 @@ int mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { - int need_unref = 0; mdc_local_t *local = NULL; struct mdc_conf *conf = this->private; @@ -2883,7 +2953,7 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, if (!local) goto unwind; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); if (!conf->force_readdirp) { STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this), @@ -2891,19 +2961,14 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, return 0; } - if (!xdata) { - xdata = dict_new(); - need_unref = 1; - } - - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - if (need_unref && xdata) + if (xdata != NULL) { dict_unref(xdata); + } return 0; unwind: @@ -2945,7 +3010,9 @@ mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, @@ -2987,7 +3054,9 @@ mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); @@ -3028,7 +3097,9 @@ mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); @@ -3110,7 +3181,7 @@ mdc_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, if (!local) goto unwind; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); STACK_WIND(frame, mdc_fsyncdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata); @@ -3483,7 +3554,12 @@ mdc_register_xattr_inval(xlator_t *this) goto out; } - mdc_load_reqs(this, xattr); + if (!mdc_load_reqs(this, xattr)) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY, + "failed to populate cache entries"); + ret = -1; + goto out; + } frame = create_frame(this, this->ctx->pool); if (!frame) { @@ -3532,7 +3608,7 @@ int mdc_reconfigure(xlator_t *this, dict_t *options) { struct mdc_conf *conf = NULL; - int timeout = 0; + int timeout = 0, ret = 0; char *tmp_str = NULL; conf = this->private; @@ -3572,7 +3648,10 @@ mdc_reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out); GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out); - mdc_xattr_list_populate(conf, tmp_str); + + ret = mdc_xattr_list_populate(conf, tmp_str); + if (ret < 0) + goto out; /* If timeout is greater than 60s (default before the patch that added * cache invalidation support was added) then, cache invalidation @@ -3585,25 +3664,22 @@ mdc_reconfigure(xlator_t *this, dict_t *options) } conf->timeout = timeout; - (void)mdc_register_xattr_inval(this); + ret = mdc_register_xattr_inval(this); out: - return 0; + return ret; } int32_t mdc_mem_acct_init(xlator_t *this) { - int ret = -1; - - ret = xlator_mem_acct_init(this, gf_mdc_mt_end + 1); - return ret; + return xlator_mem_acct_init(this, gf_mdc_mt_end + 1); } int mdc_init(xlator_t *this) { struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; char *tmp_str = NULL; conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t); @@ -3615,7 +3691,7 @@ mdc_init(xlator_t *this) LOCK_INIT(&conf->lock); - GF_OPTION_INIT("md-cache-timeout", timeout, int32, out); + GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out); GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out); @@ -3649,7 +3725,9 @@ mdc_init(xlator_t *this) GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out); mdc_xattr_list_populate(conf, tmp_str); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); + conf->statfs_cache.last_refreshed = (time_t)-1; + /* initialize gf_atomic_t counters */ GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0); GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0); @@ -3680,7 +3758,7 @@ out: } void -mdc_update_child_down_time(xlator_t *this, time_t *now) +mdc_update_child_down_time(xlator_t *this, time_t now) { struct mdc_conf *conf = NULL; @@ -3688,7 +3766,7 @@ mdc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); } @@ -3698,14 +3776,12 @@ mdc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; struct mdc_conf *conf = NULL; - time_t now = 0; conf = this->private; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - mdc_update_child_down_time(this, &now); + mdc_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->mdc_invalidation) diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c index 03dedf8ea08..29b99b5b8ea 100644 --- a/xlators/performance/nl-cache/src/nl-cache-helper.c +++ b/xlators/performance/nl-cache/src/nl-cache-helper.c @@ -113,7 +113,7 @@ out: } void -nlc_update_child_down_time(xlator_t *this, time_t *now) +nlc_update_child_down_time(xlator_t *this, time_t now) { nlc_conf_t *conf = NULL; @@ -121,7 +121,7 @@ nlc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); @@ -262,7 +262,7 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) if (nlc_ctx->timer) { gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer, conf->cache_timeout); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); goto unlock; } @@ -496,7 +496,7 @@ __nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) nlc_ctx->timer_data = tmp; gf_tw_add_timer(conf->timer_wheel, timer); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); gf_msg_trace(this->name, 0, "Registering timer:%p, inode:%p, " "gfid:%s", diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c index cd0e1d195fd..33a7c471663 100644 --- a/xlators/performance/nl-cache/src/nl-cache.c +++ b/xlators/performance/nl-cache/src/nl-cache.c @@ -520,15 +520,13 @@ int nlc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; - time_t now = 0; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: case GF_EVENT_CHILD_UP: case GF_EVENT_SOME_DESCENDENT_UP: - time(&now); - nlc_update_child_down_time(this, &now); + nlc_update_child_down_time(this, gf_time()); /* TODO: nlc_clear_all_cache (this); else lru prune will lazily clear it*/ break; @@ -731,7 +729,7 @@ nlc_init(xlator_t *this) GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0); INIT_LIST_HEAD(&conf->lru); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx); if (!conf->timer_wheel) { diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h index 8b09972bb09..85fcc176342 100644 --- a/xlators/performance/nl-cache/src/nl-cache.h +++ b/xlators/performance/nl-cache/src/nl-cache.h @@ -155,7 +155,7 @@ nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, loc_t *loc, loc_t *loc2); void -nlc_update_child_down_time(xlator_t *this, time_t *now); +nlc_update_child_down_time(xlator_t *this, time_t now); void nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason); diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h index f25082433f8..0e789177684 100644 --- a/xlators/performance/open-behind/src/open-behind-messages.h +++ b/xlators/performance/open-behind/src/open-behind-messages.h @@ -23,6 +23,10 @@ */ GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED, - OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY); + OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY, + OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE); + +#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop" +#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state" #endif /* _OPEN_BEHIND_MESSAGES_H_ */ diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index cbe89ec82e8..600c3b62ffe 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -16,6 +16,18 @@ #include "open-behind-messages.h" #include <glusterfs/glusterfs-acl.h> +/* Note: The initial design of open-behind was made to cover the simple case + * of open, read, close for small files. This pattern combined with + * quick-read can do the whole operation without a single request to the + * bricks (except the initial lookup). + * + * The way to do this has been improved, but the logic remains the same. + * Basically, this means that any operation sent to the fd or the inode + * that it's not a read, causes the open request to be sent to the + * bricks, and all future operations will be executed synchronously, + * including opens (it's reset once all fd's are closed). + */ + typedef struct ob_conf { gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe e.g - fstat() readv() @@ -32,1096 +44,811 @@ typedef struct ob_conf { */ } ob_conf_t; -typedef struct ob_inode { - inode_t *inode; - struct list_head resume_fops; - struct list_head ob_fds; - int count; - int op_ret; - int op_errno; - gf_boolean_t open_in_progress; - int unlinked; -} ob_inode_t; +/* A negative state represents an errno value negated. In this case the + * current operation cannot be processed. */ +typedef enum _ob_state { + /* There are no opens on the inode or the first open is already + * completed. The current operation can be sent directly. */ + OB_STATE_READY = 0, -typedef struct ob_fd { - call_frame_t *open_frame; - loc_t loc; - dict_t *xdata; - int flags; - int op_errno; - ob_inode_t *ob_inode; - fd_t *fd; - gf_boolean_t opened; - gf_boolean_t ob_inode_fops_waiting; - struct list_head list; - struct list_head ob_fds_on_inode; -} ob_fd_t; + /* There's an open pending and it has been triggered. The current + * operation should be "stubbified" and processed with + * ob_stub_dispatch(). */ + OB_STATE_OPEN_TRIGGERED, -ob_inode_t * -ob_inode_alloc(inode_t *inode) -{ - ob_inode_t *ob_inode = NULL; + /* There's an open pending but it has not been triggered. The current + * operation can be processed directly but using an anonymous fd. */ + OB_STATE_OPEN_PENDING, - ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); - if (ob_inode == NULL) - goto out; + /* The current operation is the first open on the inode. */ + OB_STATE_FIRST_OPEN +} ob_state_t; - ob_inode->inode = inode; - INIT_LIST_HEAD(&ob_inode->resume_fops); - INIT_LIST_HEAD(&ob_inode->ob_fds); -out: - return ob_inode; -} - -void -ob_inode_free(ob_inode_t *ob_inode) -{ - if (ob_inode == NULL) - goto out; +typedef struct ob_inode { + /* List of stubs pending on the first open. Once the first open is + * complete, all these stubs will be resubmitted, and dependencies + * will be checked again. */ + struct list_head resume_fops; - list_del_init(&ob_inode->resume_fops); - list_del_init(&ob_inode->ob_fds); + /* The inode this object references. */ + inode_t *inode; - GF_FREE(ob_inode); -out: - return; -} + /* The fd from the first open sent to this inode. It will be set + * from the moment the open is processed until the open if fully + * executed or closed before actually opened. It's NULL in all + * other cases. */ + fd_t *first_fd; + + /* The stub from the first open operation. When open fop starts + * being processed, it's assigned the OB_OPEN_PREPARING value + * until the actual stub is created. This is necessary to avoid + * creating the stub inside a locked region. Once the stub is + * successfully created, it's assigned here. This value is set + * to NULL once the stub is resumed. */ + call_stub_t *first_open; + + /* The total number of currently open fd's on this inode. */ + int32_t open_count; + + /* This flag is set as soon as we know that the open will be + * sent to the bricks, even before the stub is ready. */ + bool triggered; +} ob_inode_t; -ob_inode_t * -ob_inode_get(xlator_t *this, inode_t *inode) +/* Dummy pointer used temporarily while the actual open stub is being created */ +#define OB_OPEN_PREPARING ((call_stub_t *)-1) + +#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \ + case OB_STATE_FIRST_OPEN: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \ + "fop=%s", #_fop, "state=%d", __ob_state, NULL); \ + default_##_fop##_failure_cbk(_frame, EINVAL); \ + break; \ + case OB_STATE_READY: \ + default_##_fop(_frame, _xl, ##_args); \ + break; \ + case OB_STATE_OPEN_TRIGGERED: { \ + call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \ + ##_args); \ + if (__ob_stub != NULL) { \ + ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + default: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \ + OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \ + default_##_fop##_failure_cbk(_frame, -__ob_state) + +#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + if (!(_trigger)) { \ + fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \ + (_fd)->flags); \ + if (__ob_fd != NULL) { \ + default_##_fop(_frame, _xl, ##_args); \ + fd_unref(__ob_fd); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \ + break; \ + OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_inode( \ + _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +static ob_inode_t * +ob_inode_get_locked(xlator_t *this, inode_t *inode) { ob_inode_t *ob_inode = NULL; uint64_t value = 0; - int ret = 0; - if (!inode) - goto out; + if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) { + return (ob_inode_t *)(uintptr_t)value; + } - LOCK(&inode->lock); - { - __inode_ctx_get(inode, this, &value); - if (value == 0) { - ob_inode = ob_inode_alloc(inode); - if (ob_inode == NULL) - goto unlock; - - value = (uint64_t)(uintptr_t)ob_inode; - ret = __inode_ctx_set(inode, this, &value); - if (ret < 0) { - ob_inode_free(ob_inode); - ob_inode = NULL; - } - } else { - ob_inode = (ob_inode_t *)(uintptr_t)value; + ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); + if (ob_inode != NULL) { + ob_inode->inode = inode; + INIT_LIST_HEAD(&ob_inode->resume_fops); + + value = (uint64_t)(uintptr_t)ob_inode; + if (__inode_ctx_set(inode, this, &value) < 0) { + GF_FREE(ob_inode); + ob_inode = NULL; } } -unlock: - UNLOCK(&inode->lock); -out: return ob_inode; } -ob_fd_t * -__ob_fd_ctx_get(xlator_t *this, fd_t *fd) +static ob_state_t +ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd, + int32_t open_count, bool synchronous, bool trigger, + ob_inode_t **pob_inode, fd_t **pfd) { - uint64_t value = 0; - int ret = -1; - ob_fd_t *ob_fd = NULL; + ob_conf_t *conf; + ob_inode_t *ob_inode; + call_stub_t *open_stub; - ret = __fd_ctx_get(fd, this, &value); - if (ret) - return NULL; + if (inode == NULL) { + return OB_STATE_READY; + } - ob_fd = (void *)((long)value); + conf = xl->private; - return ob_fd; -} + *pfd = NULL; -ob_fd_t * -ob_fd_ctx_get(xlator_t *this, fd_t *fd) -{ - ob_fd_t *ob_fd = NULL; - - LOCK(&fd->lock); + LOCK(&inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - } - UNLOCK(&fd->lock); - - return ob_fd; -} + ob_inode = ob_inode_get_locked(xl, inode); + if (ob_inode == NULL) { + UNLOCK(&inode->lock); -int -__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - uint64_t value = 0; - int ret = -1; + return -ENOMEM; + } + *pob_inode = ob_inode; + + ob_inode->open_count += open_count; + + /* If first_fd is not NULL, it means that there's a previous open not + * yet completed. */ + if (ob_inode->first_fd != NULL) { + *pfd = ob_inode->first_fd; + /* If the current request doesn't trigger the open and it hasn't + * been triggered yet, we can continue without issuing the open + * only if the current request belongs to the same fd as the + * first one. */ + if (!trigger && !ob_inode->triggered && + (ob_inode->first_fd == fd)) { + UNLOCK(&inode->lock); + + return OB_STATE_OPEN_PENDING; + } - value = (long)((void *)ob_fd); + /* We need to issue the open. It could have already been triggered + * before. In this case open_stub will be NULL. Or the initial open + * may not be completely ready yet. In this case open_stub will be + * OB_OPEN_PREPARING. */ + open_stub = ob_inode->first_open; + ob_inode->first_open = NULL; + ob_inode->triggered = true; - ret = __fd_ctx_set(fd, this, value); + UNLOCK(&inode->lock); - return ret; -} + if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) { + call_resume(open_stub); + } -int -ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - int ret = -1; + return OB_STATE_OPEN_TRIGGERED; + } - LOCK(&fd->lock); - { - ret = __ob_fd_ctx_set(this, fd, ob_fd); - } - UNLOCK(&fd->lock); + /* There's no pending open. Only opens can be non synchronous, so all + * regular fops will be processed directly. For non synchronous opens, + * we'll still process them normally (i.e. synchornous) if there are + * more file descriptors open. */ + if (synchronous || (ob_inode->open_count > open_count)) { + UNLOCK(&inode->lock); - return ret; -} + return OB_STATE_READY; + } -ob_fd_t * -ob_fd_new(void) -{ - ob_fd_t *ob_fd = NULL; + *pfd = fd; - ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t); + /* This is the first open. We keep a reference on the fd and set + * first_open stub to OB_OPEN_PREPARING until the actual stub can + * be assigned (we don't create the stub here to avoid doing memory + * allocations inside the mutex). */ + ob_inode->first_fd = __fd_ref(fd); + ob_inode->first_open = OB_OPEN_PREPARING; - INIT_LIST_HEAD(&ob_fd->list); - INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode); + /* If lazy_open is not set, we'll need to immediately send the open, + * so we set triggered right now. */ + ob_inode->triggered = !conf->lazy_open; + } + UNLOCK(&inode->lock); - return ob_fd; + return OB_STATE_FIRST_OPEN; } -void -ob_fd_free(ob_fd_t *ob_fd) +static ob_state_t +ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count, + bool synchronous, bool trigger, ob_inode_t **pob_inode, + fd_t **pfd) { - LOCK(&ob_fd->fd->inode->lock); - { - list_del_init(&ob_fd->ob_fds_on_inode); - } - UNLOCK(&ob_fd->fd->inode->lock); - - loc_wipe(&ob_fd->loc); - - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); + uint64_t err; - if (ob_fd->open_frame) { - /* If we sill have a frame it means that background open has never - * been triggered. We need to release the pending reference. */ - fd_unref(ob_fd->fd); - - STACK_DESTROY(ob_fd->open_frame->root); + if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) { + return (ob_state_t)-err; } - GF_FREE(ob_fd); + return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous, + trigger, pob_inode, pfd); } -int -ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, fd_t *fd_ret, dict_t *xdata) +static ob_state_t +ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode, + fd_t **pfd) { - fd_t *fd = NULL; - int count = 0; - int ob_inode_op_ret = 0; - int ob_inode_op_errno = 0; - ob_fd_t *ob_fd = NULL; - call_stub_t *stub = NULL, *tmp = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t ob_inode_fops_waiting = _gf_false; - struct list_head fops_waiting_on_fd, fops_waiting_on_inode; + bool synchronous; - fd = frame->local; - frame->local = NULL; - - INIT_LIST_HEAD(&fops_waiting_on_fd); - INIT_LIST_HEAD(&fops_waiting_on_inode); + /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't + * we also execute this open synchronously ? */ + synchronous = (flags & O_TRUNC) != 0; - ob_inode = ob_inode_get(this, fd->inode); + return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd); +} - LOCK(&fd->lock); +static int32_t +ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +{ + LOCK(&ob_inode->inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - ob_fd->opened = _gf_true; - - ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting; - - list_splice_init(&ob_fd->list, &fops_waiting_on_fd); - - if (op_ret < 0) { - /* mark fd BAD for ever */ - ob_fd->op_errno = op_errno; - ob_fd = NULL; /*shouldn't be freed*/ - } else { - __fd_ctx_del(fd, this, NULL); - } - } - UNLOCK(&fd->lock); - - if (ob_inode_fops_waiting) { - LOCK(&fd->inode->lock); - { - count = --ob_inode->count; - if (op_ret < 0) { - /* TODO: when to reset the error? */ - ob_inode->op_ret = -1; - ob_inode->op_errno = op_errno; - } - - if (count == 0) { - ob_inode->open_in_progress = _gf_false; - ob_inode_op_ret = ob_inode->op_ret; - ob_inode_op_errno = ob_inode->op_errno; - list_splice_init(&ob_inode->resume_fops, - &fops_waiting_on_inode); - } + /* We only queue a stub if the open has not been completed or + * cancelled. */ + if (ob_inode->first_fd == fd) { + list_add_tail(&stub->list, &ob_inode->resume_fops); + stub = NULL; } - UNLOCK(&fd->inode->lock); - } - - if (ob_fd) - ob_fd_free(ob_fd); - - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list) - { - list_del_init(&stub->list); - - if (op_ret < 0) - call_unwind_error(stub, -1, op_errno); - else - call_resume(stub); } + UNLOCK(&ob_inode->inode->lock); - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list) - { - list_del_init(&stub->list); - - if (ob_inode_op_ret < 0) - call_unwind_error(stub, -1, ob_inode_op_errno); - else - call_resume(stub); + if (stub != NULL) { + call_resume(stub); } - /* The background open is completed. We can release the 'fd' reference. */ - fd_unref(fd); - - STACK_DESTROY(frame->root); - return 0; } -int -ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +static void +ob_open_destroy(call_stub_t *stub, fd_t *fd) { - call_frame_t *frame = NULL; + stub->frame->local = NULL; + STACK_DESTROY(stub->frame->root); + call_stub_destroy(stub); + fd_unref(fd); +} - if (ob_fd == NULL) { - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; +static int32_t +ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +{ + bool closed; - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - } - unlock: - UNLOCK(&fd->lock); - } else { - LOCK(&fd->lock); - { - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; + LOCK(&ob_inode->inode->lock); + { + closed = ob_inode->first_fd != fd; + if (!closed) { + if (ob_inode->triggered) { + ob_inode->first_open = NULL; + } else { + ob_inode->first_open = stub; + stub = NULL; + } } - UNLOCK(&fd->lock); } + UNLOCK(&ob_inode->inode->lock); - if (frame) { - /* We don't need to take a reference here. We already have a reference - * while the open is pending. */ - frame->local = fd; - - STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, - ob_fd->xdata); + if (stub != NULL) { + if (closed) { + ob_open_destroy(stub, fd); + } else { + call_resume(stub); + } } return 0; } -void -ob_inode_wake(xlator_t *this, struct list_head *ob_fds) +static void +ob_resume_pending(struct list_head *list) { - ob_fd_t *ob_fd = NULL, *tmp = NULL; + call_stub_t *stub; - if (!list_empty(ob_fds)) { - list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) - { - ob_fd_wake(this, ob_fd->fd, ob_fd); - ob_fd_free(ob_fd); - } - } -} + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); -/* called holding inode->lock and fd->lock */ -void -ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) -{ - if (!src || !dst) - goto out; - - dst->fd = src->fd; - dst->loc.inode = inode_ref(src->loc.inode); - gf_uuid_copy(dst->loc.gfid, src->loc.gfid); - dst->flags = src->flags; - dst->xdata = dict_ref(src->xdata); - dst->ob_inode = src->ob_inode; -out: - return; + call_resume(stub); + } } -int -open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode, - call_stub_t *stub) +static void +ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret, + int32_t op_errno) { - ob_inode_t *ob_inode = NULL; - ob_fd_t *ob_fd = NULL, *tmp = NULL; - gf_boolean_t was_open_in_progress = _gf_false; - gf_boolean_t wait_for_open = _gf_false; - struct list_head ob_fds; + struct list_head list; - ob_inode = ob_inode_get(this, inode); - if (ob_inode == NULL) - goto out; + INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&ob_fds); + if (op_ret < 0) { + fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno); + } - LOCK(&inode->lock); + LOCK(&ob_inode->inode->lock); { - was_open_in_progress = ob_inode->open_in_progress; - ob_inode->unlinked = 1; - - if (was_open_in_progress) { - list_add_tail(&stub->list, &ob_inode->resume_fops); - goto inode_unlock; - } - - list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode) - { - LOCK(&ob_fd->fd->lock); - { - if (ob_fd->opened) - goto fd_unlock; - - ob_inode->count++; - ob_fd->ob_inode_fops_waiting = _gf_true; - - if (ob_fd->open_frame == NULL) { - /* open in progress no need of wake */ - } else { - tmp = ob_fd_new(); - tmp->open_frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - - ob_fd_copy(ob_fd, tmp); - list_add_tail(&tmp->ob_fds_on_inode, &ob_fds); - } - } - fd_unlock: - UNLOCK(&ob_fd->fd->lock); - } - - if (ob_inode->count) { - wait_for_open = ob_inode->open_in_progress = _gf_true; - list_add_tail(&stub->list, &ob_inode->resume_fops); + /* Only update the fields if the file has not been closed before + * getting here. */ + if (ob_inode->first_fd == fd) { + list_splice_init(&ob_inode->resume_fops, &list); + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; } } -inode_unlock: - UNLOCK(&inode->lock); + UNLOCK(&ob_inode->inode->lock); -out: - if (!was_open_in_progress) { - if (!wait_for_open) { - call_resume(stub); - } else { - ob_inode_wake(this, &ob_fds); - } - } + ob_resume_pending(&list); - return 0; + fd_unref(fd); } -int -open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub) +static int32_t +ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int op_errno = 0; - - if (!fd) - goto nofd; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; + ob_inode_t *ob_inode; - if (ob_fd->op_errno) { - op_errno = ob_fd->op_errno; - goto unlock; - } + ob_inode = frame->local; + frame->local = NULL; - list_add_tail(&stub->list, &ob_fd->list); - } -unlock: - UNLOCK(&fd->lock); + ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno); -nofd: - if (op_errno) - call_unwind_error(stub, -1, op_errno); - else if (ob_fd) - ob_fd_wake(this, fd, NULL); - else - call_resume(stub); + STACK_DESTROY(frame->root); return 0; } -int -ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, +static int32_t +ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int ret = -1; - ob_conf_t *conf = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t open_in_progress = _gf_false; - int unlinked = 0; + STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - conf = this->private; + return 0; +} - if (flags & O_TRUNC) { - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; +static int32_t +ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) +{ + ob_inode_t *ob_inode; + call_frame_t *open_frame; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * or the current flags require a synchronous open. */ + return default_open(frame, this, loc, flags, fd, xdata); } - ob_inode = ob_inode_get(this, fd->inode); - - ob_fd = ob_fd_new(); - if (!ob_fd) - goto enomem; - - ob_fd->ob_inode = ob_inode; + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } - ob_fd->fd = fd; + state = -ENOMEM; + } - ob_fd->open_frame = copy_frame(frame); - if (!ob_fd->open_frame) - goto enomem; - ret = loc_copy(&ob_fd->loc, loc); - if (ret) - goto enomem; + if (state == OB_STATE_FIRST_OPEN) { + /* We try to create a stub for the new open. A new frame needs to be + * used because the current one may be destroyed soon after sending + * the open's reply. */ + open_frame = copy_frame(frame); + if (open_frame != NULL) { + stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd, + xdata); + if (stub != NULL) { + open_frame->local = ob_inode; - ob_fd->flags = flags; - if (xdata) - ob_fd->xdata = dict_ref(xdata); + /* TODO: Previous version passed xdata back to the caller, but + * probably this doesn't make sense since it won't contain + * any requested data. I think it would be better to pass + * NULL for xdata. */ + default_open_cbk(frame, NULL, this, 0, 0, fd, xdata); - LOCK(&fd->inode->lock); - { - open_in_progress = ob_inode->open_in_progress; - unlinked = ob_inode->unlinked; - if (!open_in_progress && !unlinked) { - ret = ob_fd_ctx_set(this, fd, ob_fd); - if (ret) { - UNLOCK(&fd->inode->lock); - goto enomem; + return ob_open_dispatch(this, ob_inode, first_fd, stub); } - list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds); + STACK_DESTROY(open_frame->root); } - } - UNLOCK(&fd->inode->lock); - /* We take a reference while the background open is pending or being - * processed. If we finally wind the request in the foreground, then - * ob_fd_free() will take care of this additional reference. */ - fd_ref(fd); + /* In case of error, simulate a regular completion but with an error + * code. */ + ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM); - if (!open_in_progress && !unlinked) { - STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); - - if (!conf->lazy_open) - ob_fd_wake(this, fd, NULL); - } else { - ob_fd_free(ob_fd); - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + state = -ENOMEM; } - return 0; -enomem: - if (ob_fd) { - if (ob_fd->open_frame) - STACK_DESTROY(ob_fd->open_frame->root); - - loc_wipe(&ob_fd->loc); - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ - GF_FREE(ob_fd); + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; } + UNLOCK(&fd->inode->lock); - return -1; + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "open", "path=%s", loc->path, NULL); + + return default_open_failure_cbk(frame, -state); } -int -ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, - dict_t *xdata) +static int32_t +ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - fd_t *old_fd = NULL; - int ret = -1; - int op_errno = ENOMEM; - call_stub_t *stub = NULL; - - old_fd = fd_lookup(fd->inode, 0); - if (old_fd) { - /* open-behind only when this is the first FD */ - stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); - if (!stub) { - fd_unref(old_fd); - goto err; - } - - open_and_resume(this, old_fd, stub); - - fd_unref(old_fd); - - return 0; - } - - ret = ob_open_behind(frame, this, loc, flags, fd, xdata); - if (ret) { - goto err; + ob_inode_t *ob_inode; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + /* Create requests are never delayed. We always send them synchronously. */ + state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode, + &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * so we simply forward the request synchronously. */ + return default_create(frame, this, loc, flags, mode, umask, fd, xdata); } - return 0; -err: - gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s", - loc->path); - - STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0); - - return 0; -} + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd, + xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } -fd_t * -ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag) -{ - fd_t *wind_fd = NULL; - ob_fd_t *ob_fd = NULL; - ob_conf_t *conf = NULL; + state = -ENOMEM; + } - conf = this->private; + /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never + * be returned by ob_open_and_resume_fd(). If we are here it can only be + * because there has been a problem. */ - ob_fd = ob_fd_ctx_get(this, fd); + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ - if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) { - wind_fd = fd_anonymous(fd->inode); - if ((ob_fd->flags & O_DIRECT) && (flag)) - *flag = *flag | O_DIRECT; - } else { - wind_fd = fd_ref(fd); + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; } + UNLOCK(&fd->inode->lock); + + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "create", "path=%s", loc->path, NULL); - return wind_fd; + return default_create_failure_cbk(frame, -state); } -int +static int32_t ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - ob_conf_t *conf = NULL; - - conf = this->private; + ob_conf_t *conf = this->private; + bool trigger = conf->read_after_open || !conf->use_anonymous_fd; - if (!conf->read_after_open) - wind_fd = ob_get_wind_fd(this, fd, &flags); - else - wind_fd = fd_ref(fd); - - stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset, - flags, xdata); - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata); return 0; } -int +static int32_t ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, int count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset, - flags, iobref, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags, + iobref, xdata); return 0; } -int +static int32_t ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - - wind_fd = ob_get_wind_fd(this, fd, NULL); - - stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata); - - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - return 0; -err: - STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata); return 0; } -int +static int32_t ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, gf_seek_what_t what, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - - wind_fd = ob_get_wind_fd(this, fd, NULL); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what, - xdata); - - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata); return 0; } -int +static int32_t ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - ob_fd_t *ob_fd = NULL; - gf_boolean_t unwind = _gf_false; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (ob_fd && ob_fd->open_frame) - /* if open() was never wound to backend, - no need to wind flush() either. - */ - unwind = _gf_true; - } - UNLOCK(&fd->lock); - - if (unwind) - goto unwind; - - stub = fop_flush_stub(frame, default_flush_resume, fd, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0); - - return 0; - -unwind: - STACK_UNWIND_STRICT(flush, frame, 0, 0, 0); + OB_POST_FLUSH(this, frame, fd, fd, xdata); return 0; } -int +static int32_t ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata); return 0; } -int +static int32_t ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata); return 0; } -int +static int32_t ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, int flags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata); return 0; } -int +static int32_t ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume, - volume, fd, cmd, flock, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); + OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - call_stub_t *stub = fop_fentrylk_stub( - frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); + OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type, + xdata); return 0; } -int +static int32_t ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, - optype, xattr, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata); return 0; } -int +static int32_t ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt, int valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata); return 0; } -int +static int32_t ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; + OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata); - stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset, - len, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); + OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata); return 0; -err: - STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; } -int +static int32_t ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len, - xdata); - if (!stub) - goto err; + OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata); - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata); return 0; } -int +static int32_t ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, dst->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); + OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata); return 0; } -int32_t +static int32_t ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid, - xdata); - if (!stub) - goto err; + OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid, + xdata); - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int32_t +static int32_t ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - gf_boolean_t access_xattr = _gf_false; - if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) || dict_get(dict, POSIX_ACL_ACCESS_XATTR) || - dict_get(dict, GF_SELINUX_XATTR_KEY)) - access_xattr = _gf_true; - - if (!access_xattr) + dict_get(dict, GF_SELINUX_XATTR_KEY)) { return default_setxattr(frame, this, loc, dict, flags, xdata); + } - stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags, - xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); + OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags, + xdata); return 0; -err: - STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL); - return 0; } -int -ob_release(xlator_t *this, fd_t *fd) +static void +ob_fdclose(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; + struct list_head list; + ob_inode_t *ob_inode; + call_stub_t *stub; + + INIT_LIST_HEAD(&list); + stub = NULL; - ob_fd = ob_fd_ctx_get(this, fd); + LOCK(&fd->inode->lock); + { + ob_inode = ob_inode_get_locked(this, fd->inode); + if (ob_inode != NULL) { + ob_inode->open_count--; + + /* If this fd is the same as ob_inode->first_fd, it means that + * the initial open has not fully completed. We'll try to cancel + * it. */ + if (ob_inode->first_fd == fd) { + if (ob_inode->first_open == OB_OPEN_PREPARING) { + /* In this case ob_open_dispatch() has not been called yet. + * We clear first_fd and first_open to allow that function + * to know that the open is not really needed. This also + * allows other requests to work as expected if they + * arrive before the dispatch function is called. If there + * are pending fops, we can directly process them here. + * (note that there shouldn't be any fd related fops, but + * if there are, it's fine if they fail). */ + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; + list_splice_init(&ob_inode->resume_fops, &list); + } else if (!ob_inode->triggered) { + /* If the open has already been dispatched, we can only + * cancel it if it has not been triggered. Otherwise we + * simply wait until it completes. While it's not triggered, + * first_open must be a valid stub and there can't be any + * pending fops. */ + GF_ASSERT((ob_inode->first_open != NULL) && + list_empty(&ob_inode->resume_fops)); + + ob_inode->first_fd = NULL; + stub = ob_inode->first_open; + ob_inode->first_open = NULL; + } + } + } + } + UNLOCK(&fd->inode->lock); - ob_fd_free(ob_fd); + if (stub != NULL) { + ob_open_destroy(stub, fd); + } - return 0; + ob_resume_pending(&list); } int ob_forget(xlator_t *this, inode_t *inode) { - ob_inode_t *ob_inode = NULL; + ob_inode_t *ob_inode; uint64_t value = 0; - inode_ctx_del(inode, this, &value); - - if (value) { + if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { ob_inode = (ob_inode_t *)(uintptr_t)value; - ob_inode_free(ob_inode); + GF_FREE(ob_inode); } return 0; @@ -1153,20 +880,18 @@ ob_priv_dump(xlator_t *this) int ob_fdctx_dump(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - int ret = 0; + uint64_t value = 0; + int ret = 0, error = 0; ret = TRY_LOCK(&fd->lock); if (ret) return 0; - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) { - UNLOCK(&fd->lock); - return 0; + if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) { + error = (int32_t)value; } gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind", @@ -1175,17 +900,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd) gf_proc_dump_write("fd", "%p", fd); - gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame); - - if (ob_fd->open_frame) - gf_proc_dump_write("open_frame.root.unique", "%" PRIu64, - ob_fd->open_frame->root->unique); - - gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path); - - gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid)); - - gf_proc_dump_write("flags", "%d", ob_fd->flags); + gf_proc_dump_write("error", "%d", error); UNLOCK(&fd->lock); @@ -1282,6 +997,7 @@ fini(xlator_t *this) struct xlator_fops fops = { .open = ob_open, + .create = ob_create, .readv = ob_readv, .writev = ob_writev, .flush = ob_flush, @@ -1307,7 +1023,7 @@ struct xlator_fops fops = { }; struct xlator_cbks cbks = { - .release = ob_release, + .fdclose = ob_fdclose, .forget = ob_forget, }; diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 4f16d148262..7fe4b3c3a4b 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -421,9 +421,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, qr_private_t *priv = NULL; qr_inode_table_t *table = NULL; uint32_t rollover = 0; - struct timeval tv = { - 0, - }; rollover = gen >> 32; gen = gen & 0xffffffff; @@ -431,7 +428,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, priv = this->private; table = &priv->table; - gettimeofday(&tv, NULL); LOCK(&table->lock); { if ((rollover != qr_inode->gen_rollover) || @@ -453,8 +449,7 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec; qr_inode->buf = *buf; - - memcpy(&qr_inode->last_refresh, &tv, sizeof(struct timeval)); + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } @@ -524,9 +519,7 @@ __qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) { qr_inode->buf = *buf; - - gettimeofday(&qr_inode->last_refresh, NULL); - + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } else { __qr_inode_prune(this, table, qr_inode, gen); @@ -558,20 +551,14 @@ __qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode) { qr_conf_t *conf = NULL; qr_private_t *priv = NULL; - struct timeval now; - struct timeval diff; priv = this->private; conf = &priv->conf; - gettimeofday(&now, NULL); - - timersub(&now, &qr_inode->last_refresh, &diff); - - if (qr_inode->last_refresh.tv_sec < priv->last_child_down) + if (qr_inode->last_refresh < priv->last_child_down) return _gf_false; - if (diff.tv_sec >= conf->cache_timeout) + if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout) return _gf_false; return _gf_true; @@ -1034,7 +1021,7 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode) char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - char buf[256] = { + char buf[GF_TIMESTR_SIZE] = { 0, }; @@ -1049,12 +1036,8 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode) gf_proc_dump_write("entire-file-cached", "%s", qr_inode->data ? "yes" : "no"); - if (qr_inode->last_refresh.tv_sec) { - gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh.tv_sec, - gf_timefmt_FT); - snprintf(buf + strlen(buf), sizeof buf - strlen(buf), - ".%" GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec); - + if (qr_inode->last_refresh) { + gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT); gf_proc_dump_write("last-cache-validation-time", "%s", buf); } @@ -1407,7 +1390,7 @@ qr_init(xlator_t *this) ret = 0; - time(&priv->last_child_down); + priv->last_child_down = gf_time(); GF_ATOMIC_INIT(priv->generation, 0); this->private = priv; out: @@ -1457,7 +1440,7 @@ qr_conf_destroy(qr_conf_t *conf) } void -qr_update_child_down_time(xlator_t *this, time_t *now) +qr_update_child_down_time(xlator_t *this, time_t now) { qr_private_t *priv = NULL; @@ -1465,7 +1448,7 @@ qr_update_child_down_time(xlator_t *this, time_t *now) LOCK(&priv->lock); { - priv->last_child_down = *now; + priv->last_child_down = now; } UNLOCK(&priv->lock); } @@ -1511,7 +1494,6 @@ qr_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; qr_private_t *priv = NULL; - time_t now = 0; qr_conf_t *conf = NULL; priv = this->private; @@ -1520,8 +1502,7 @@ qr_notify(xlator_t *this, int event, void *data, ...) switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - qr_update_child_down_time(this, &now); + qr_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->qr_invalidation) diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h index 67850821b8e..20fcc70b3a7 100644 --- a/xlators/performance/quick-read/src/quick-read.h +++ b/xlators/performance/quick-read/src/quick-read.h @@ -39,7 +39,7 @@ struct qr_inode { uint32_t ia_ctime_nsec; uint32_t gen_rollover; struct iatt buf; - struct timeval last_refresh; + time_t last_refresh; struct list_head lru; uint64_t gen; uint64_t invalidation_time; diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index e1d0e9aaf00..00cfca016e6 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -2489,7 +2489,7 @@ wb_mark_readdirp_start(xlator_t *this, inode_t *directory) wb_directory_inode = wb_inode_create(this, directory); - if (!wb_directory_inode || !wb_directory_inode->lock.spinlock) + if (!wb_directory_inode) return; LOCK(&wb_directory_inode->lock); @@ -2509,7 +2509,7 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory) wb_directory_inode = wb_inode_ctx_get(this, directory); - if (!wb_directory_inode || !wb_directory_inode->lock.spinlock) + if (!wb_directory_inode) return; LOCK(&wb_directory_inode->lock); |