summaryrefslogtreecommitdiffstats
path: root/xlators/features
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/features')
-rw-r--r--xlators/features/Makefile.am6
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c12
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h20
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c68
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h5
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c68
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c52
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h21
-rw-r--r--xlators/features/changelog/src/changelog-messages.h8
-rw-r--r--xlators/features/changelog/src/changelog.c51
-rw-r--r--xlators/features/cloudsync/src/Makefile.am4
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c2
-rw-r--r--xlators/features/index/src/index.c4
-rw-r--r--xlators/features/leases/src/leases-internal.c4
-rw-r--r--xlators/features/locks/src/clear.c4
-rw-r--r--xlators/features/locks/src/common.c307
-rw-r--r--xlators/features/locks/src/common.h41
-rw-r--r--xlators/features/locks/src/entrylk.c31
-rw-r--r--xlators/features/locks/src/inodelk.c162
-rw-r--r--xlators/features/locks/src/locks.h41
-rw-r--r--xlators/features/locks/src/posix.c216
-rw-r--r--xlators/features/metadisp/Makefile.am3
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c6
-rw-r--r--xlators/features/quota/src/quota.c59
-rw-r--r--xlators/features/quota/src/quota.h5
-rw-r--r--xlators/features/read-only/src/worm-helper.c15
-rw-r--r--xlators/features/shard/src/shard.c525
-rw-r--r--xlators/features/shard/src/shard.h7
-rw-r--r--xlators/features/trash/src/trash.c4
-rw-r--r--xlators/features/upcall/src/upcall-internal.c10
45 files changed, 2431 insertions, 479 deletions
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 194634b003d..c57897f11ea 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC
CLOUDSYNC_DIR = cloudsync
endif
+if BUILD_METADISP
+ METADISP_DIR = metadisp
+endif
+
SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
compress changelog gfid-access snapview-client snapview-server trash \
shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
- utime
+ utime $(METADISP_DIR)
CLEANFILES =
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
index 34e20f9df11..5cef2ffa5e5 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat)
}
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv)
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time)
{
if (!scrub_stat)
return;
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_start_time = time;
}
pthread_mutex_unlock(&scrub_stat->lock);
}
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv)
+ time_t time)
{
int lst_size = 0;
@@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_end_time = time;
- scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec -
- scrub_stat->scrub_start_tv.tv_sec;
+ scrub_stat->scrub_duration = scrub_stat->scrub_end_time -
+ scrub_stat->scrub_start_time;
snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr);
}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
index 24128b90a66..f022aa831eb 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -15,20 +15,22 @@
#include <sys/time.h>
#include <pthread.h>
+#include <glusterfs/common-utils.h>
+
struct br_scrub_stats {
- uint64_t scrubbed_files; /* Total number of scrubbed file */
+ uint64_t scrubbed_files; /* Total number of scrubbed files. */
- uint64_t unsigned_files; /* Total number of unsigned file */
+ uint64_t unsigned_files; /* Total number of unsigned files. */
- uint64_t scrub_duration; /* Duration of last scrub */
+ uint64_t scrub_duration; /* Duration of last scrub. */
- char last_scrub_time[1024]; /*last scrub completion time */
+ char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */
- struct timeval scrub_start_tv; /* Scrubbing starting time*/
+ time_t scrub_start_time; /* Scrubbing starting time. */
- struct timeval scrub_end_tv; /* Scrubbing finishing time */
+ time_t scrub_end_time; /* Scrubbing finishing time. */
- int8_t scrub_running; /* Scrub running or not */
+ int8_t scrub_running; /* Whether scrub running or not. */
pthread_mutex_t lock;
};
@@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat);
void
br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat);
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv);
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time);
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv);
+ time_t time);
#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index d20ecc7cdbe..289dd53f610 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -601,25 +601,23 @@ br_fsscan_deactivate(xlator_t *this)
static void
br_scrubber_log_time(xlator_t *this, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
br_private_t *priv = NULL;
+ time_t now = 0;
+ now = gf_time();
priv = this->private;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
- br_update_scrub_start_time(&priv->scrub_stat, &tv);
+ br_update_scrub_start_time(&priv->scrub_stat, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
"Scrubbing %s at %s", sfx, timestr);
} else {
- br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv);
+ br_update_scrub_finish_time(&priv->scrub_stat, timestr, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
"Scrubbing %s at %s", sfx, timestr);
}
@@ -628,15 +626,13 @@ br_scrubber_log_time(xlator_t *this, const char *sfx)
static void
br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ now = gf_time();
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
@@ -919,10 +915,7 @@ br_fsscan_schedule(xlator_t *this)
{
uint32_t timo = 0;
br_private_t *priv = NULL;
- struct timeval tv = {
- 0,
- };
- char timestr[1024] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
struct br_scrubber *fsscrub = NULL;
@@ -933,8 +926,7 @@ br_fsscan_schedule(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&tv, NULL);
- scrub_monitor->boot = tv.tv_sec;
+ scrub_monitor->boot = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
@@ -975,12 +967,10 @@ int32_t
br_fsscan_activate(xlator_t *this)
{
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -989,7 +979,7 @@ br_fsscan_activate(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1003,7 +993,7 @@ br_fsscan_activate(xlator_t *this)
}
pthread_mutex_unlock(&scrub_monitor->donelock);
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
(void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo);
_br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
@@ -1020,12 +1010,10 @@ br_fsscan_reschedule(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -1037,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this)
if (!fsscrub->frequency_reconf)
return 0;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1045,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this)
return -1;
}
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1073,23 +1061,19 @@ br_fsscan_ondemand(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_monitor *scrub_monitor = NULL;
priv = this->private;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
-
+ now = gf_time();
timo = BR_SCRUB_ONDEMAND;
-
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1799,7 +1783,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
tmp_count = total_count;
for (j = 0; j < count; j++) {
- len = snprintf(key, PATH_MAX, "quarantine-%d", j);
+ len = snprintf(key, sizeof(key), "quarantine-%d", j);
ret = dict_get_strn(child_dict, key, len, &entry);
if (ret)
continue;
@@ -1810,7 +1794,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
if ((len < 0) || (len >= PATH_MAX)) {
continue;
}
- snprintf(main_key, PATH_MAX, "quarantine-%d", tmp_count);
+ snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count);
ret = dict_set_dynstr_with_alloc(dict, main_key, tmp);
if (!ret)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
index 8d2b7f051da..6c15a166f18 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL,
BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED,
BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED,
- BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED);
+ BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED,
+ BRS_MSG_VERSION_PREPARE_FAIL);
#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed"
@@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
"daemon. Unwinding the fop"
#define BRS_MSG_SIGN_PREPARE_FAIL_STR \
"failed to prepare the signature. Unwinding the fop"
+#define BRS_MSG_VERSION_PREPARE_FAIL_STR \
+ "failed to prepare the version. Unwinding the fop"
#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding"
#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad"
#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 605a5e4c3e4..447dd47ff41 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict,
priv = this->private;
br_set_ongoingversion(obuf, oversion, priv->boot);
- return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
- sizeof(br_version_t));
+ return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
+ sizeof(br_version_t));
}
static int
@@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf,
br_set_signature(sbuf, sign, signaturelen, &size);
- return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf,
- size);
+ return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size);
}
/**
@@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame,
op_errno = ENOMEM;
dict = dict_new();
if (!dict)
- goto done;
+ goto out;
ret = br_stub_alloc_versions(&obuf, NULL, 0);
- if (ret)
- goto dealloc_dict;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ br_stub_dealloc_versions(obuf);
+ goto out;
+ }
ret = br_stub_fd_versioning(
this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk,
writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
-
-dealloc_versions:
- br_stub_dealloc_versions(obuf);
-dealloc_dict:
- dict_unref(dict);
-done:
+out:
+ if (dict)
+ dict_unref(dict);
if (ret) {
if (local)
frame->local = NULL;
@@ -1025,31 +1028,36 @@ static int
br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode,
br_isignature_t *sign, int *fakesuccess)
{
- int32_t ret = 0;
+ int32_t ret = -1;
size_t signaturelen = 0;
br_signature_t *sbuf = NULL;
if (!br_is_signature_type_valid(sign->signaturetype))
- goto error_return;
+ goto out;
signaturelen = sign->signaturelen;
ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen);
- if (ret)
- goto error_return;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ goto out;
+ }
ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ br_stub_dealloc_versions(sbuf);
+ goto out;
+ }
+ /* At this point sbuf has been added to dict, so the memory will be freed
+ * when the data from the dict is destroyed
+ */
ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess);
- if (ret)
- goto dealloc_versions;
-
- return 0;
-
-dealloc_versions:
- br_stub_dealloc_versions(sbuf);
-error_return:
- return -1;
+out:
+ return ret;
}
static void
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
index 71fe1f032a0..e561997d858 100644
--- a/xlators/features/changelog/src/changelog-helpers.c
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -242,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len)
}
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer)
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer)
{
char changelog_path[PATH_MAX + 1] = {
0,
@@ -273,7 +272,7 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
goto out;
}
- len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts,
+ len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts,
priv->rollover_count);
if (len >= sizeof(x_value)) {
ret = -1;
@@ -382,8 +381,7 @@ out:
}
static int
-changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts)
+changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ret = -1;
int notify = 0;
@@ -421,16 +419,14 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
priv->changelog_fd = -1;
}
- time_t time = (time_t)ts;
-
- /* Get GMT time */
- gmt = gmtime(&time);
+ /* Get GMT time. */
+ gmt = gmtime(&ts);
strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt);
(void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
priv->changelog_dir);
- (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%lu",
+ (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld",
priv->changelog_dir, yyyymmdd, ts);
(void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd);
@@ -593,7 +589,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -723,7 +719,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -741,12 +737,12 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
int32_t len = 0;
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE,
- "name=%lu", ts, NULL);
+ "name=%ld", ts, NULL);
CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
/* get the htime file name in ht_file_path */
- len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path,
+ len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path,
HTIME_FILE_NAME, ts);
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
@@ -792,7 +788,7 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
goto out;
}
- (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu",
+ (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld",
HTIME_FILE_NAME, ts);
if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
strlen(ht_file_bname), 0)) {
@@ -963,8 +959,8 @@ out:
}
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale)
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale)
{
int ret = -1;
@@ -985,21 +981,12 @@ changelog_entry_length()
return sizeof(changelog_log_data_t);
}
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last)
{
- struct timeval tv = {
- 0,
- };
-
cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
-
- if (gettimeofday(&tv, NULL))
- return -1;
-
- cld->cld_roll_time = (unsigned long)tv.tv_sec;
+ cld->cld_roll_time = gf_time();
cld->cld_finale = is_last;
- return 0;
}
int
@@ -1274,7 +1261,7 @@ changelog_rollover(void *data)
while (1) {
(void)pthread_testcancel();
- tv.tv_sec = time(NULL) + priv->rollover_time;
+ tv.tv_sec = gf_time() + priv->rollover_time;
tv.tv_nsec = 0;
ret = 0; /* Reset ret to zero */
@@ -1355,12 +1342,7 @@ changelog_rollover(void *data)
if (priv->explicit_rollover == _gf_true)
sleep(1);
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret) {
- gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, NULL);
- continue;
- }
+ changelog_fill_rollover_data(&cld, _gf_false);
_mask_cancellation();
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
index 0d06d98c9e1..38fa7590c32 100644
--- a/xlators/features/changelog/src/changelog-helpers.h
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -31,7 +31,7 @@
*/
typedef struct changelog_log_data {
/* rollover related */
- unsigned long cld_roll_time;
+ time_t cld_roll_time;
/* reopen changelog? */
gf_boolean_t cld_finale;
@@ -97,12 +97,6 @@ struct changelog_encoder {
typedef struct changelog_time_slice {
/**
- * just in case we need nanosecond granularity some day.
- * field is unused as of now (maybe we'd need it later).
- */
- struct timeval tv_start;
-
- /**
* version of changelog file, incremented each time changes
* rollover.
*/
@@ -423,11 +417,11 @@ changelog_local_t *
changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
int xtra_records, gf_boolean_t update_flag);
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale);
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale);
int
changelog_open_journal(xlator_t *this, changelog_priv_t *priv);
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last);
int
changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
@@ -451,12 +445,11 @@ changelog_fsync_thread(void *data);
int
changelog_forget(xlator_t *this, inode_t *inode);
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer);
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer);
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts);
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts);
/* Geo-Rep snapshot dependency changes */
void
diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h
index 4dd56b8ee97..cb0e16c85d8 100644
--- a/xlators/features/changelog/src/changelog-messages.h
+++ b/xlators/features/changelog/src/changelog-messages.h
@@ -59,12 +59,12 @@ GLFS_MSGID(
CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT,
CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR,
CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED,
- CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED,
+ CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0,
CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY,
CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED,
CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE,
CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET,
- CHANGELOG_MSG_GET_TIME_FAILURE, CHANGELOG_MSG_HTIME_FETCH_FAILED,
+ CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2,
CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED,
CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED,
@@ -123,8 +123,6 @@ GLFS_MSGID(
#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)"
#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify"
#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed"
-#define CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED_STR \
- "failed to fill rollover data"
#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event"
#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \
"changelog local initialization failed"
@@ -144,9 +142,7 @@ GLFS_MSGID(
#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found"
#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \
"Something went wrong in dict_get_str_boolean"
-#define CHANGELOG_MSG_GET_TIME_FAILURE_STR "gettimeofday() failure"
#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set"
-#define CHANGELOG_MSG_HTIME_FETCH_FAILED_STR "unable to fetch htime"
#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper"
#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \
"translator needs a single subvolume"
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index 37916f40882..6a6e5af859e 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -2036,20 +2036,20 @@ notify(xlator_t *this, int event, void *data, ...)
priv->notify_down = _gf_true;
}
UNLOCK(&priv->lock);
- list_for_each_entry_safe(listener, next, &priv->rpc->listeners,
- list)
- {
- if (listener->trans) {
- rpc_transport_unref(listener->trans);
+ if (priv->rpc) {
+ list_for_each_entry_safe(listener, next,
+ &priv->rpc->listeners, list)
+ {
+ if (listener->trans) {
+ rpc_transport_unref(listener->trans);
+ }
}
+ rpcsvc_destroy(priv->rpc);
+ priv->rpc = NULL;
}
CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
UNIX_PATH_MAX);
sys_unlink(sockfile);
- if (priv->rpc) {
- rpcsvc_destroy(priv->rpc);
- priv->rpc = NULL;
- }
if (!cleanup_notify)
default_notify(this, GF_EVENT_PARENT_DOWN, data);
}
@@ -2252,23 +2252,11 @@ static int
changelog_init(xlator_t *this, changelog_priv_t *priv)
{
int i = 0;
- int ret = -1;
- struct timeval tv = {
- 0,
- };
+ int ret = 0;
changelog_log_data_t cld = {
0,
};
- ret = gettimeofday(&tv, NULL);
- if (ret) {
- gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_GET_TIME_FAILURE,
- NULL);
- goto out;
- }
-
- priv->slice.tv_start = tv;
-
priv->maps[CHANGELOG_TYPE_DATA] = "D ";
priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M ";
@@ -2287,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv)
* in case there was an encoding change. so... things are kept
* simple here.
*/
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, _gf_false);
ret = htime_open(this, priv, cld.cld_roll_time);
/* call htime open with cld's rollover_time */
@@ -2470,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options)
char csnap_dir[PATH_MAX] = {
0,
};
- struct timeval tv = {
- 0,
- };
uint32_t timeout = 0;
priv = this->private;
@@ -2564,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options)
out);
if (active_now || active_earlier) {
- ret = changelog_fill_rollover_data(&cld, !active_now);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, !active_now);
slice = &priv->slice;
@@ -2585,13 +2566,7 @@ reconfigure(xlator_t *this, dict_t *options)
if (!active_earlier) {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE,
NULL);
- if (gettimeofday(&tv, NULL)) {
- gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_HTIME_FETCH_FAILED, NULL);
- ret = -1;
- goto out;
- }
- htime_create(this, priv, tv.tv_sec);
+ htime_create(this, priv, gf_time());
}
ret = changelog_spawn_helper_threads(this, priv);
}
diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am
index 0c3966c968b..e2a277e372b 100644
--- a/xlators/features/cloudsync/src/Makefile.am
+++ b/xlators/features/cloudsync/src/Makefile.am
@@ -21,9 +21,9 @@ cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources)
nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h
BUILT_SOURCES = cloudsync-autogen-fops.h
-cloudsync_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL)
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
-DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\"
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
index 7680260988b..23c3599825a 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
@@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid,
int date_len = -1;
int res_len = -1;
- ctime = time(NULL);
+ ctime = gf_time();
gtime = gmtime(&ctime);
date_len = strftime(httpdate, sizeof(httpdate),
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index 4ece7ff6fc8..4abb2c73ce5 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -2104,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
worker_enqueue(this, stub);
return 0;
normal:
- ret = dict_get_str(xattr_req, "link-count", &flag);
+ ret = dict_get_str_sizen(xattr_req, "link-count", &flag);
if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) {
STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -2592,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...)
if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) {
stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait for draining stub from queue before notify PARENT_DOWN */
diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c
index 67fdd53cee2..56dee244281 100644
--- a/xlators/features/leases/src/leases-internal.c
+++ b/xlators/features/leases/src/leases-internal.c
@@ -897,7 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
}
priv = this->private;
- recall_time = time(NULL);
+ recall_time = gf_time();
list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
lease_id_list)
{
@@ -1367,7 +1367,7 @@ expired_recall_cleanup(void *data)
gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread");
while (1) {
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&priv->mutex);
{
if (priv->fini) {
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 116aed68690..ab1eac68a53 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
if (plock->blocked) {
bcount++;
pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW,
- &plock->user_flock, -1, EAGAIN, NULL);
+ &plock->user_flock, -1, EINTR, NULL);
- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN,
+ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR,
&plock->user_flock, NULL);
} else {
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 4c6b78c2372..a2c6be93e03 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -460,11 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
INIT_LIST_HEAD(&pl_inode->blocked_calls);
INIT_LIST_HEAD(&pl_inode->metalk_list);
INIT_LIST_HEAD(&pl_inode->queued_locks);
+ INIT_LIST_HEAD(&pl_inode->waiting);
gf_uuid_copy(pl_inode->gfid, inode->gfid);
pl_inode->check_mlock_info = _gf_true;
pl_inode->mlock_enforced = _gf_false;
+ /* -2 means never looked up. -1 means something went wrong and link
+ * tracking is disabled. */
+ pl_inode->links = -2;
+
ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
if (ret) {
pthread_mutex_destroy(&pl_inode->mutex);
@@ -600,13 +605,11 @@ static void
__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock)
{
if (lock->blocked)
- gettimeofday(&lock->blkd_time, NULL);
+ lock->blkd_time = gf_time();
else
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add_tail(&lock->list, &pl_inode->ext_list);
-
- return;
}
/* Return true if the locks overlap, false otherwise */
@@ -1290,3 +1293,299 @@ pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client)
}
return _gf_true;
}
+
+static int32_t
+pl_inode_from_loc(loc_t *loc, inode_t **pinode)
+{
+ inode_t *inode = NULL;
+ int32_t error = 0;
+
+ if (loc->inode != NULL) {
+ inode = inode_ref(loc->inode);
+ goto done;
+ }
+
+ if (loc->parent == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (!gf_uuid_is_null(loc->gfid)) {
+ inode = inode_find(loc->parent->table, loc->gfid);
+ if (inode != NULL) {
+ goto done;
+ }
+ }
+
+ if (loc->name == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ inode = inode_grep(loc->parent->table, loc->parent, loc->name);
+ if (inode == NULL) {
+ /* We haven't found any inode. This means that the file doesn't exist
+ * or that even if it exists, we don't have any knowledge about it, so
+ * we don't have locks on it either, which is fine for our purposes. */
+ goto done;
+ }
+
+done:
+ *pinode = inode;
+
+ return error;
+}
+
+static gf_boolean_t
+pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *lock;
+ gf_boolean_t has_owners = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ /* If the lock belongs to the same client, we assume it's related
+ * to the same operation, so we allow the removal to continue. */
+ if (lock->client == client) {
+ continue;
+ }
+ /* If the lock belongs to an internal process, we don't block the
+ * removal. */
+ if (lock->client_pid < 0) {
+ continue;
+ }
+ if (contend == NULL) {
+ return _gf_true;
+ }
+ has_owners = _gf_true;
+ inodelk_contention_notify_check(xl, lock, now, contend);
+ }
+ }
+
+ return has_owners;
+}
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend)
+{
+ struct timespec now;
+ inode_t *inode;
+ pl_inode_t *pl_inode;
+ int32_t error;
+
+ pl_inode = NULL;
+
+ error = pl_inode_from_loc(loc, &inode);
+ if ((error != 0) || (inode == NULL)) {
+ goto done;
+ }
+
+ pl_inode = pl_inode_get(xl, inode, NULL);
+ if (pl_inode == NULL) {
+ inode_unref(inode);
+ error = ENOMEM;
+ goto done;
+ }
+
+ /* pl_inode_from_loc() already increments ref count for inode, so
+ * we only assign here our reference. */
+ pl_inode->inode = inode;
+
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (pl_inode->removed) {
+ error = ESTALE;
+ goto unlock;
+ }
+
+ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+ error = -1;
+ /* We skip the unlock here because the caller must create a stub when
+ * we return -1 and do a call to pl_inode_remove_complete(), which
+ * assumes the lock is still acquired and will release it once
+ * everything else is prepared. */
+ goto done;
+ }
+
+ pl_inode->is_locked = _gf_true;
+ pl_inode->remove_running++;
+
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+done:
+ *ppl_inode = pl_inode;
+
+ return error;
+}
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend)
+{
+ pl_inode_lock_t *lock;
+ int32_t error = -1;
+
+ if (stub != NULL) {
+ list_add_tail(&stub->list, &pl_inode->waiting);
+ pl_inode->is_locked = _gf_true;
+ } else {
+ error = ENOMEM;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, list);
+ list_del_init(&lock->list);
+ __pl_inodelk_unref(lock);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (error < 0) {
+ inodelk_contention_notify(xl, contend);
+ }
+
+ inode_unref(pl_inode->inode);
+
+ return error;
+}
+
+void
+pl_inode_remove_wake(struct list_head *list)
+{
+ call_stub_t *stub;
+
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
+
+ call_resume(stub);
+ }
+}
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+{
+ struct list_head contend, granted;
+ struct timespec now;
+ pl_dom_list_t *dom;
+
+ if (pl_inode == NULL) {
+ return;
+ }
+
+ INIT_LIST_HEAD(&contend);
+ INIT_LIST_HEAD(&granted);
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (error == 0) {
+ if (pl_inode->links >= 0) {
+ pl_inode->links--;
+ }
+ if (pl_inode->links == 0) {
+ pl_inode->removed = _gf_true;
+ }
+ }
+
+ pl_inode->remove_running--;
+
+ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+ pl_inode->is_locked = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+ &contend);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(xl, pl_inode, &granted);
+
+ inodelk_contention_notify(xl, &contend);
+
+ inode_unref(pl_inode->inode);
+}
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list)
+{
+ call_stub_t *stub, *tmp;
+
+ if (!pl_inode->is_locked) {
+ return;
+ }
+
+ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list)
+ {
+ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL,
+ NULL)) {
+ list_move_tail(&stub->list, list);
+ }
+ }
+}
+
+/* This function determines if an inodelk attempt can be done now or it needs
+ * to wait.
+ *
+ * Possible return values:
+ * < 0: An error occurred. Currently only -ESTALE can be returned if the
+ * inode has been deleted previously by unlink/rmdir/rename
+ * = 0: The lock can be attempted.
+ * > 0: The lock needs to wait because a conflicting remove operation is
+ * ongoing.
+ */
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *ilock;
+
+ /* If the inode has been deleted, we won't allow any lock. */
+ if (pl_inode->removed) {
+ return -ESTALE;
+ }
+
+ /* We only synchronize with locks made for regular operations coming from
+ * the user. Locks done for internal purposes are hard to control and could
+ * lead to long delays or deadlocks quite easily. */
+ if (lock->client_pid < 0) {
+ return 0;
+ }
+ if (!pl_inode->is_locked) {
+ return 0;
+ }
+ if (pl_inode->remove_running > 0) {
+ return 1;
+ }
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(ilock, &dom->inodelk_list, list)
+ {
+ /* If a lock from the same client is already granted, we allow this
+ * one to continue. This is necessary to prevent deadlocks when
+ * multiple locks are taken for the same operation.
+ *
+ * On the other side it's unlikely that the same client sends
+ * completely unrelated locks for the same inode.
+ */
+ if (ilock->client == lock->client) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 0916c299e84..281223bf3b8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -105,6 +105,15 @@ void
__pl_inodelk_unref(pl_inode_lock_t *lock);
void
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend);
+
+void
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted);
+
+void
grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
pl_dom_list_t *dom, struct timespec *now,
struct list_head *contend);
@@ -204,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode);
void
__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+void
+inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
gf_boolean_t
pl_does_monkey_want_stuck_lock();
@@ -218,4 +237,26 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd);
gf_boolean_t
pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client);
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend);
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend);
+
+void
+pl_inode_remove_wake(struct list_head *list);
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error);
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list);
+
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 0911659b437..fd772c850dd 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -121,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
@@ -129,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
* chance? Or just the locks we are attempting to acquire?
*/
if (names_conflict(candidate_lock->basename, requested_lock->basename)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -204,9 +202,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
- struct timespec *now)
+void
+entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -216,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -225,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -238,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -332,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
break;
}
}
- if (__entrylk_needs_contention_notify(this, tmp, now)) {
- list_add_tail(&tmp->contend, contend);
- }
+ entrylk_contention_notify_check(this, tmp, now, contend);
}
}
@@ -546,14 +542,10 @@ static int
__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
pl_entry_lock_t *lock, int nonblock)
{
- struct timeval now;
-
if (nonblock)
goto out;
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks);
gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
@@ -614,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
}
__pl_entrylk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->domain_list, &dom->entrylk_list);
ret = 0;
@@ -697,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend);
if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
/* Grants locks if possible which are blocked on a lock */
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index e0e3b8f1f2d..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -140,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
/* Question: Should we just prune them all given the
* chance? Or just the locks we are attempting to acquire?
*/
if (inodelk_conflict(candidate_lock, requested_lock)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -229,9 +227,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
- struct timespec *now)
+void
+inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -241,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -250,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -263,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -351,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
break;
}
}
- if (__inodelk_needs_contention_notify(this, l, now)) {
- list_add_tail(&l->contend, contend);
- }
+ inodelk_contention_notify_check(this, l, now, contend);
}
}
@@ -399,15 +395,11 @@ static int
__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
int can_block)
{
- struct timeval now;
-
if (can_block == 0) {
goto out;
}
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
gf_msg_trace(this->name, 0,
@@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
struct list_head *contend)
{
pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
+ int ret;
- conf = __inodelk_grantable(this, dom, lock, now, contend);
- if (conf) {
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ ret = pl_inode_remove_inodelk(pl_inode, lock);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret == 0) {
+ conf = __inodelk_grantable(this, dom, lock, now, contend);
+ }
+ if ((ret > 0) || (conf != NULL)) {
+ return __lock_blocked_add(this, dom, lock, can_block);
}
/* To prevent blocked locks starvation, check if there are any blocked
@@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
"starvation");
}
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ return __lock_blocked_add(this, dom, lock, can_block);
}
__pl_inodelk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->list, &dom->inodelk_list);
- ret = 0;
-
-out:
- return ret;
+ return 0;
}
/* Return true if the two inodelks have exactly same lock boundaries */
@@ -527,12 +520,11 @@ out:
return conf;
}
-static void
+void
__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
struct list_head *granted, pl_dom_list_t *dom,
struct timespec *now, struct list_head *contend)
{
- int bl_ret = 0;
pl_inode_lock_t *bl = NULL;
pl_inode_lock_t *tmp = NULL;
@@ -545,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
{
list_del_init(&bl->blocked_locks);
- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
+ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
- if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ if (bl->status != -EAGAIN) {
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
-/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct timespec *now,
- struct list_head *contend)
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- struct list_head granted;
pl_inode_lock_t *lock;
pl_inode_lock_t *tmp;
+ int32_t op_ret;
+ int32_t op_errno;
- INIT_LIST_HEAD(&granted);
-
- pthread_mutex_lock(&pl_inode->mutex);
- {
- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
- contend);
- }
- pthread_mutex_unlock(&pl_inode->mutex);
-
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
- lkowner_utoa(&lock->owner), lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ if (lock->status == 0) {
+ op_ret = 0;
+ op_errno = 0;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ } else {
+ op_ret = -1;
+ op_errno = -lock->status;
+ }
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
- 0, 0, lock->volume);
+ op_ret, op_errno, lock->volume);
- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL);
+ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
lock->frame = NULL;
}
pthread_mutex_lock(&pl_inode->mutex);
{
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
list_del_init(&lock->blocked_locks);
__pl_inodelk_unref(lock);
@@ -599,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_unlock(&pl_inode->mutex);
}
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ struct list_head granted;
+
+ INIT_LIST_HEAD(&granted);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(this, pl_inode, &granted);
+}
+
static void
pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
{
@@ -660,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* and blocked lists, then this means that a parallel
* unlock on another inodelk (L2 say) may have 'granted'
* L1 and added it to 'granted' list in
- * __grant_blocked_node_locks() (although using the
+ * __grant_blocked_inode_locks() (although using the
* 'blocked_locks' member). In that case, the cleanup
* codepath must try and grant other overlapping
* blocked inodelks from other clients, now that L1 is
@@ -745,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
gf_boolean_t need_inode_unref = _gf_false;
struct list_head *pcontend = NULL;
struct list_head contend;
+ struct list_head wake;
struct timespec now = {};
short fl_type;
@@ -796,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
timespec_now(&now);
}
+ INIT_LIST_HEAD(&wake);
+
if (ctx)
pthread_mutex_lock(&ctx->lock);
pthread_mutex_lock(&pl_inode->mutex);
@@ -818,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
- if (can_block)
+ if (can_block) {
unref = _gf_false;
- /* For all but the case where a non-blocking
- * lock attempt fails, the extra ref taken at
- * the start of this function must be negated.
- */
- else
- need_inode_unref = _gf_true;
+ }
}
-
- if (ctx && (!ret || can_block))
+ /* For all but the case where a non-blocking lock attempt fails
+ * with -EAGAIN, the extra ref taken at the start of this function
+ * must be negated. */
+ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
+ if (ctx && !need_inode_unref) {
list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
+ }
} else {
/* Irrespective of whether unlock succeeds or not,
* the extra inode ref that was done at the start of
@@ -847,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
list_del_init(&retlock->client_list);
__pl_inodelk_unref(retlock);
+ pl_inode_remove_unlocked(this, pl_inode, &wake);
+
ret = 0;
}
out:
@@ -857,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
if (ctx)
pthread_mutex_unlock(&ctx->lock);
+ pl_inode_remove_wake(&wake);
+
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -1037,10 +1051,14 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
inode);
if (ret < 0) {
- if ((can_block) && (F_UNLCK != lock_type)) {
- goto out;
+ if (ret == -EAGAIN) {
+ if (can_block && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
}
- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
op_errno = -ret;
goto unwind;
}
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 3305350afb1..c868eb494a2 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -43,9 +43,8 @@ struct __posix_lock {
fd_t *fd;
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
/* These two together serve to uniquely identify each process
across nodes */
@@ -85,9 +84,9 @@ struct __pl_inode_lock {
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -102,6 +101,9 @@ struct __pl_inode_lock {
struct list_head client_list; /* list of all locks from a client */
short fl_type;
+
+ int32_t status; /* Error code when we try to grant a lock in blocked
+ state */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -136,9 +138,9 @@ struct __entry_lock {
const char *basename;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -164,13 +166,14 @@ struct __pl_inode {
struct list_head rw_list; /* list of waiting r/w requests */
struct list_head reservelk_list; /* list of reservelks */
struct list_head blocked_reservelks; /* list of blocked reservelks */
- struct list_head
- blocked_calls; /* List of blocked lock calls while a reserve is held*/
- struct list_head metalk_list; /* Meta lock list */
- /* This is to store the incoming lock
- requests while meta lock is enabled */
- struct list_head queued_locks;
- int mandatory; /* if mandatory locking is enabled */
+ struct list_head blocked_calls; /* List of blocked lock calls while a
+ reserve is held*/
+ struct list_head metalk_list; /* Meta lock list */
+ struct list_head queued_locks; /* This is to store the incoming lock
+ requests while meta lock is enabled */
+ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir
+ the inode. */
+ int mandatory; /* if mandatory locking is enabled */
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
@@ -197,7 +200,13 @@ struct __pl_inode {
*/
int fop_wind_count;
pthread_cond_t check_fop_wind_count;
+
gf_boolean_t track_fop_wind_count;
+
+ int32_t links; /* Number of hard links the inode has. */
+ uint32_t remove_running; /* Number of remove operations running. */
+ gf_boolean_t is_locked; /* Regular locks will be blocked. */
+ gf_boolean_t removed; /* The inode has been deleted. */
};
typedef struct __pl_inode pl_inode_t;
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 8b57627addf..cf0ae4c57dd 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -148,6 +148,29 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
} \
} while (0)
+#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \
+ _args...) \
+ ({ \
+ struct list_head contend; \
+ pl_inode_t *__pl_inode; \
+ call_stub_t *__stub; \
+ int32_t __error; \
+ INIT_LIST_HEAD(&contend); \
+ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \
+ &__pl_inode, &contend); \
+ if (__error < 0) { \
+ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \
+ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \
+ &contend); \
+ } else if (__error == 0) { \
+ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \
+ _loc2); \
+ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \
+ FIRST_CHILD(_xl)->fops->_fop, ##_args); \
+ } \
+ __error; \
+ })
+
gf_boolean_t
pl_has_xdata_requests(dict_t *xdata)
{
@@ -471,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
char *save_ptr = NULL;
tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
strtok_r(tmp_key, ":", &save_ptr);
if (!*save_ptr) {
if (tmp_key)
@@ -2962,11 +2988,85 @@ out:
return ret;
}
+static int32_t
+pl_request_link_count(dict_t **pxdata)
+{
+ dict_t *xdata;
+
+ xdata = *pxdata;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return ENOMEM;
+ }
+ } else {
+ dict_ref(xdata);
+ }
+
+ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+ dict_unref(xdata);
+ return ENOMEM;
+ }
+
+ *pxdata = xdata;
+
+ return 0;
+}
+
+static int32_t
+pl_check_link_count(dict_t *xdata)
+{
+ int32_t count;
+
+ /* In case we are unable to read the link count from xdata, we take a
+ * conservative approach and return -2, which will prevent the inode from
+ * being considered deleted. In fact it will cause link tracking for this
+ * inode to be disabled completely to avoid races. */
+
+ if (xdata == NULL) {
+ return -2;
+ }
+
+ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+ return -2;
+ }
+
+ return count;
+}
+
int32_t
pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
+ pl_inode_t *pl_inode;
+
+ if (op_ret >= 0) {
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (pl_inode == NULL) {
+ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* We only update the link count if we previously didn't know it.
+ * Doing it always can lead to races since lookup is not executed
+ * atomically most of the times. */
+ if (pl_inode->links == -2) {
+ pl_inode->links = pl_check_link_count(xdata);
+ if (buf->ia_type == IA_IFDIR) {
+ /* Directories have at least 2 links. To avoid special handling
+ * for directories, we simply decrement the value here to make
+ * them equivalent to regular files. */
+ pl_inode->links--;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
@@ -2975,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ int32_t error;
+
+ error = pl_request_link_count(&xdata);
+ if (error == 0) {
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ } else {
+ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+ }
return 0;
}
@@ -3502,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner,
time_t *blkd_time, gf_boolean_t active)
{
char *type_str = NULL;
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
@@ -3556,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
int count = 0;
@@ -3579,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->entrylk_list, domain_list)
{
- gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec,
+ gf_time_fmt(granted, sizeof(granted), lock->granted_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count);
- if (lock->blkd_time.tv_sec == 0) {
+ if (lock->blkd_time == 0) {
snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
: "ENTRYLK_WRLCK",
@@ -3590,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
lkowner_utoa(&lock->owner), lock->client,
lock->connection_id, granted);
} else {
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
@@ -3607,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
{
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count);
@@ -3659,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id,
- &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
- _gf_true);
+ lock->client, lock->connection_id, &lock->granted_time,
+ &lock->blkd_time, _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3673,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
count);
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id, 0,
- &lock->blkd_time.tv_sec, _gf_false);
+ lock->client, lock->connection_id, 0, &lock->blkd_time,
+ _gf_false);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3707,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode)
gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->client_uid, &lock->granted_time.tv_sec,
- &lock->blkd_time.tv_sec,
- (lock->blocked) ? _gf_false : _gf_true);
+ lock->client, lock->client_uid, &lock->granted_time,
+ &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3793,6 +3899,10 @@ unlock:
gf_proc_dump_write("posixlk-count", "%d", count);
__dump_posixlks(pl_inode);
}
+
+ gf_proc_dump_write("links", "%d", pl_inode->links);
+ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+ gf_proc_dump_write("removed", "%u", pl_inode->removed);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -4138,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
struct iatt *postoldparent, struct iatt *prenewparent,
struct iatt *postnewparent, dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent,
postoldparent, prenewparent, postnewparent, xdata);
+
return 0;
}
@@ -4147,10 +4260,15 @@ int32_t
pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename,
+ pl_rename_cbk, oldloc, newloc, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
}
@@ -4274,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -4283,9 +4404,14 @@ int32_t
pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink,
+ pl_unlink_cbk, loc, xflag, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -4352,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -4361,9 +4490,14 @@ int
pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir,
+ pl_rmdir_cbk, loc, xflags, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -4393,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ pl_inode_t *pl_inode = (pl_inode_t *)cookie;
+
+ if (op_ret >= 0) {
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* TODO: can happen pl_inode->links == 0 ? */
+ if (pl_inode->links >= 0) {
+ pl_inode->links++;
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
preparent, postparent, xdata);
return 0;
@@ -4402,9 +4549,18 @@ int
pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
+ pl_inode_t *pl_inode;
+
+ pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+ if (pl_inode == NULL) {
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
}
diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/metadisp/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am
new file mode 100644
index 00000000000..1520ad8c424
--- /dev/null
+++ b/xlators/features/metadisp/src/Makefile.am
@@ -0,0 +1,38 @@
+noinst_PYTHON = gen-fops.py
+
+EXTRA_DIST = fops-tmpl.c
+
+xlator_LTLIBRARIES = metadisp.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+nodist_metadisp_la_SOURCES = fops.c
+
+BUILT_SOURCES = fops.c
+
+metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+metadisp_la_SOURCES = metadisp.c \
+ metadisp-unlink.c \
+ metadisp-stat.c \
+ metadisp-lookup.c \
+ metadisp-readdir.c \
+ metadisp-create.c \
+ metadisp-open.c \
+ metadisp-fsync.c \
+ metadisp-setattr.c \
+ backend.c
+
+metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = metadisp.h metadisp-fops.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py
+ PYTHONPATH=$(top_srcdir)/libglusterfs/src \
+ $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@
+
+CLEANFILES = $(nodist_metadisp_la_SOURCES)
diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c
new file mode 100644
index 00000000000..ee2c25bfaa7
--- /dev/null
+++ b/xlators/features/metadisp/src/backend.c
@@ -0,0 +1,45 @@
+#define GFID_STR_LEN 37
+
+#include "metadisp.h"
+
+/*
+ * backend.c
+ *
+ * functions responsible for converting user-facing paths to backend-style
+ * "/$GFID" paths.
+ */
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc)
+{
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char gfid_buf[GFID_STR_LEN + 1] = {
+ 0,
+ };
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO("metadisp", src_loc, out);
+ GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out);
+
+ loc_copy(dst_loc, src_loc);
+ memcpy(dst_loc->pargfid, root, sizeof(root));
+ GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke
+ // whatever loc_copy gave us
+
+ uuid_utoa_r(gfid, gfid_buf);
+
+ path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char),
+ gf_common_mt_char); // freed via loc_wipe
+
+ path[0] = '/';
+ strncpy(path + 1, gfid_buf, GFID_STR_LEN);
+ path[GFID_STR_LEN] = 0;
+ dst_loc->path = path;
+ if (src_loc->name)
+ dst_loc->name = strrchr(dst_loc->path, '/');
+ if (dst_loc->name)
+ dst_loc->name++;
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c
new file mode 100644
index 00000000000..4385b7dd5b7
--- /dev/null
+++ b/xlators/features/metadisp/src/fops-tmpl.c
@@ -0,0 +1,10 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/xlator.h>
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+#pragma generate
diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py
new file mode 100644
index 00000000000..8b5e120fdec
--- /dev/null
+++ b/xlators/features/metadisp/src/gen-fops.py
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+
+import sys
+from generator import fop_subs, generate
+
+FN_METADATA_CHILD_GENERIC = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ metadata");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_GENERIC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ generic");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATAFD_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ datafd");
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATALOC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ dataloc");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+"""
+
+FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@,"
+
+skipped = [
+ "readdir",
+ "readdirp",
+ "lookup",
+ "fsync",
+ "stat",
+ "open",
+ "create",
+ "unlink",
+ "setattr",
+ # TODO: implement "inodelk",
+]
+
+
+def gen_fops():
+ done = skipped
+
+ #
+ # these are fops that wind to the DATA_CHILD
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "writev",
+ "readv",
+ "ftruncate",
+ "zerofill",
+ "discard",
+ "seek",
+ "fstat",
+ ]:
+ done = done + [name]
+ print(generate(FN_DATAFD_TEMPLATE, name, fop_subs))
+
+ for name in ["truncate"]:
+ done = done + [name]
+ print(generate(FN_DATALOC_TEMPLATE, name, fop_subs))
+
+ # these are fops that operate solely on dentries, folders,
+ # or extended attributes. Therefore, they must always
+ # wind to METADATA_CHILD and should never perform
+ # any path rewriting
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "mkdir",
+ "symlink",
+ "link",
+ "rename",
+ "mknod",
+ "opendir",
+ # "readdir, # special-cased
+ # "readdirp, # special-cased
+ "fsyncdir",
+ # "setattr", # special-cased
+ "readlink",
+ "fentrylk",
+ "access",
+ # TODO: these wind to both,
+ # data for backend-attributes and metadata for the rest
+ "xattrop",
+ "setxattr",
+ "getxattr",
+ "removexattr",
+ "fgetxattr",
+ "fsetxattr",
+ "fremovexattr",
+ ]:
+
+ done = done + [name]
+ print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs))
+
+ print("struct xlator_fops fops = {")
+ for name in done:
+ print(generate(FOPS_LINE_TEMPLATE, name, fop_subs))
+
+ print("};")
+
+
+for l in open(sys.argv[1], "r").readlines():
+ if l.find("#pragma generate") != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_fops()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c
new file mode 100644
index 00000000000..f8c9798dd59
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-create.c
@@ -0,0 +1,101 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Create, like stat, is a two-step process. We send a create
+ * to the METADATA_CHILD, then send another create to the DATA_CHILD.
+ *
+ * We do the metadata child first to ensure that the ACLs are enforced.
+ */
+
+int32_t
+metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ // create the backend data inode
+ STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = cookie;
+ if (op_ret != 0) {
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+ }
+
+ if (stub == NULL) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+
+ loc_t backend_loc = {
+ 0,
+ };
+ call_stub_t *stub = NULL;
+ uuid_t *gfid_req = NULL;
+
+ RESOLVE_GFID_REQ(xdata, gfid_req, out);
+
+ if (build_backend_loc(*gfid_req, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ frame->local = loc;
+
+ stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags,
+ mode, umask, fd, xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h
new file mode 100644
index 00000000000..56dd427cf34
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fops.h
@@ -0,0 +1,51 @@
+#ifndef GF_METADISP_FOPS_H_
+#define GF_METADISP_FOPS_H_
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/glusterfs.h>
+
+#include <sys/types.h>
+
+/* fops in here are defined in their own file. Every other fop is just defined
+ * inline of fops.c */
+
+int
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
+
+int
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+#endif
diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c
new file mode 100644
index 00000000000..2e46fa84eac
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fsync.c
@@ -0,0 +1,54 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c
new file mode 100644
index 00000000000..27d90c9f746
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-lookup.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Lookup, like stat, is a two-step process for grabbing the metadata details
+ * as well as the data details.
+ */
+
+int32_t
+metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ METADISP_TRACE("backend_lookup_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("backend_lookup_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->lookup, &backend_loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+
+ // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ METADISP_TRACE("lookup");
+ call_stub_t *stub = NULL;
+ stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c
new file mode 100644
index 00000000000..64814afe636
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-open.c
@@ -0,0 +1,70 @@
+#include <glusterfs/call-stub.h>
+#include "metadisp.h"
+
+int32_t
+metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE("got open results %d %d", op_ret, op_errno);
+
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!stub) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd,
+ xdata);
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c
new file mode 100644
index 00000000000..5f840b1e88f
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-readdir.c
@@ -0,0 +1,65 @@
+#include "metadisp.h"
+
+/**
+ * With a change to the posix xlator, readdir and readdirp are shockingly
+ * simple.
+ *
+ * The issue with separating the backend data of the files
+ * with the metadata is that readdirs must now read from multiple sources
+ * to coalesce the directory entries.
+ *
+ * The way we do this is to tell the METADATA_CHILD that when it's
+ * running readdirp, each file entry should have a stat wound to
+ * 'stat-source-of-truth'.
+ *
+ * see metadisp_stat for how it handles winds _from_posix.
+ */
+
+int32_t
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ /*
+ * Always use readdirp, even if the original was readdir. Why? Because NFS.
+ * There are multiple translations between Gluster, UNIX, and NFS stat
+ * structures in that path. One of them uses the type etc. from the stat
+ * structure, which is only filled in by readdirp. If we use readdir, the
+ * entries do actually go all the way back to the client and are visible in
+ * getdents, but then the readdir throws them away because of the
+ * uninitialized type.
+ */
+ GF_UNUSED int32_t ret;
+ if (!xdata) {
+ xdata = dict_new();
+ }
+
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
+}
+
+int32_t
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ GF_UNUSED int32_t ret;
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c
new file mode 100644
index 00000000000..6991cf644f3
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-setattr.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid,
+ xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(statpost->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ METADISP_TRACE("setattr");
+ call_stub_t *stub = NULL;
+ stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf,
+ valid, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c
new file mode 100644
index 00000000000..b06d0dbcddd
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-stat.c
@@ -0,0 +1,124 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The stat flow in METADISP is complicated because we must
+ * do ensure a few things:
+ * 1. stat, on the path within the metadata layer,
+ * MUST get the backend FD of the data layer.
+ * --- we wind to the metadata layer, then the data layer.
+ *
+ * 2. the metadata layer MUST be able to ask the data
+ * layer for stat information.
+ * --- this is 'syncop-internal-from-posix'
+ *
+ * 3. when the metadata exists BUT the data is missing,
+ * we MUST mark the backend file as bad and heal it.
+ */
+
+int32_t
+metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno);
+ if (op_errno == ENOENT) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL);
+ return 0;
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("winding stat to path %s", loc->path);
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("bad object, sending EUCLEAN");
+ STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ METADISP_TRACE("got stat results %d %d", op_ret, op_errno);
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // only use the stub for the files
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ loc_t backend_loc = {
+ 0,
+ };
+ METADISP_FILTER_ROOT(stat, loc, xdata);
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) {
+ // if we've just been sent a stat from posix, then we know
+ // that we must send down a stat for a file to the second child.
+ //
+ // that means we can skip the stat for the first child and just
+ // send to the data disk.
+ METADISP_TRACE("got syncop-internal-from-posix");
+ STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->stat, &backend_loc, xdata);
+ return 0;
+ }
+
+ // we do not know if the request is for a file, folder, etc. wind
+ // to first child to find out.
+ stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata);
+ METADISP_TRACE("winding stat to first child %s", loc->path);
+ STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c
new file mode 100644
index 00000000000..1f6a8eb35ce
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-unlink.c
@@ -0,0 +1,160 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The unlink flow in metadisp is complicated because we must
+ * do ensure that UNLINK causes both the metadata objects
+ * to get removed and the data objects to get removed.
+ */
+
+int32_t
+metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ METADISP_TRACE("winding backend unlink to path %s", loc->path);
+ STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE(". %d %d", op_ret, op_errno);
+
+ int ret = 0;
+ call_stub_t *stub = NULL;
+ int nlink = 0;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto unwind;
+ }
+ METADISP_TRACE("frontend hardlink count %d %d", ret, nlink);
+ if (nlink > 1) {
+ goto unwind;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // fail fast on empty gfid so we don't loop forever
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto unwind;
+ }
+
+ // fill gfid since the stub is incomplete
+ memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t));
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("winding lookup for unlink to path %s", loc->path);
+
+ // loop back to ourselves after a lookup
+ stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub,
+ METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+ }
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ //
+ // ensure we get the link count on the unlink response, so we can
+ // account for hardlinks before winding to the backend.
+ // NOTE:
+ // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation
+ // is needed to ensure that multiple requests will work in the same
+ // xlator stack.
+ //
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+
+ METADISP_TRACE("winding frontend unlink to path %s", loc->path);
+ stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag,
+ xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c
new file mode 100644
index 00000000000..3c8f150cebc
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.c
@@ -0,0 +1,46 @@
+#include <glusterfs/call-stub.h>
+
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with children. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+/* defined in fops.c */
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .op_version = {1},
+ .identifier = "metadisp",
+ .category = GF_EXPERIMENTAL,
+};
diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h
new file mode 100644
index 00000000000..c8fd7a13c04
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef GF_METADISP_H_
+#define GF_METADISP_H_
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#define METADATA_CHILD(_this) FIRST_CHILD(_this)
+#define DATA_CHILD(_this) SECOND_CHILD(_this)
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc);
+
+#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args)
+
+#define METADISP_FILTER_ROOT(_op, _args...) \
+ if (strcmp(loc->path, "/") == 0) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \
+ if (__is_root_gfid(_gfid)) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \
+ VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \
+ _lbl)
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
index 097439d86d6..480d64ade27 100644
--- a/xlators/features/quota/src/quota-enforcer-client.c
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -32,12 +32,6 @@
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
#include "quota.h"
#include "quota-messages.h"
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 73c008a2c00..18df9ae6d19 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -586,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -626,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* loop of validation and checking
* limit when timeout is zero.
*/
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -644,27 +640,10 @@ unwind:
return 0;
}
-static uint64_t
-quota_time_elapsed(struct timeval *now, struct timeval *then)
-{
- return (now->tv_sec - then->tv_sec);
-}
-
-int32_t
-quota_timeout(struct timeval *tv, int32_t timeout)
+static inline gf_boolean_t
+quota_timeout(time_t t, uint32_t timeout)
{
- struct timeval now = {
- 0,
- };
- int32_t timed_out = 0;
-
- gettimeofday(&now, NULL);
-
- if (quota_time_elapsed(&now, tv) >= timeout) {
- timed_out = 1;
- }
-
- return timed_out;
+ return (gf_time() - t) >= timeout;
}
/* Return: 1 if new entry added
@@ -1128,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if ((object_aggr_count) > ctx->object_hard_lim) {
hard_limit_exceeded = 1;
@@ -1195,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if (wouldbe_size >= ctx->hard_lim) {
hard_limit_exceeded = 1;
@@ -4314,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -4348,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
}
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -4873,7 +4848,7 @@ off:
void
quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
- char **path, struct timeval *cur_time)
+ char **path, time_t *cur_time)
{
xlator_t *this = THIS;
@@ -4892,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
if (!(*path))
*path = uuid_utoa(inode->gfid);
- gettimeofday(cur_time, NULL);
+ *cur_time = gf_time();
}
/* Logs if
@@ -4903,9 +4878,7 @@ void
quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
int64_t delta)
{
- struct timeval cur_time = {
- 0,
- };
+ time_t cur_time = 0;
char *usage_str = NULL;
char *path = NULL;
int64_t cur_size = 0;
@@ -4931,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
/* Usage is above soft limit */
else if (cur_size > ctx->soft_lim &&
- quota_timeout(&ctx->prev_log, priv->log_timeout)) {
+ quota_timeout(ctx->prev_log_time, priv->log_timeout)) {
quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time);
gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
@@ -4947,7 +4920,7 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
if (path)
@@ -5184,9 +5157,9 @@ quota_priv_dump(xlator_t *this)
if (ret)
goto out;
else {
- gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout);
- gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout);
- gf_proc_dump_write("alert-time", "%d", priv->log_timeout);
+ gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%u", priv->log_timeout);
gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 8a3dc7a77f5..0395d78c9ef 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -153,8 +153,8 @@ struct quota_inode_ctx {
int64_t object_soft_lim;
struct iatt buf;
struct list_head parents;
- struct timeval tv;
- struct timeval prev_log;
+ time_t validate_time;
+ time_t prev_log_time;
gf_boolean_t ancestry_built;
gf_lock_t lock;
};
@@ -199,6 +199,7 @@ struct quota_local {
typedef struct quota_local quota_local_t;
struct quota_priv {
+ /* FIXME: consider time_t for timeouts. */
uint32_t soft_timeout;
uint32_t hard_timeout;
uint32_t log_timeout;
diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c
index 25fbd4aa748..df45f2a940b 100644
--- a/xlators/features/read-only/src/worm-helper.c
+++ b/xlators/features/read-only/src/worm-helper.c
@@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
GF_VALIDATE_OR_GOTO("worm", this, out);
GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
- start_time = time(NULL);
+ start_time = gf_time();
dict = dict_new();
if (!dict) {
gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
@@ -94,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
if (ret)
goto out;
stbuf->ia_mtime = stpre.ia_mtime;
- stbuf->ia_atime = time(NULL) + retention_state->ret_period;
+ stbuf->ia_atime = gf_time() + retention_state->ret_period;
if (fop_with_fd)
ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
@@ -286,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
{
int op_errno = EROFS;
int ret = -1;
+ time_t now = 0;
uint64_t com_period = 0;
uint64_t start_time = 0;
dict_t *dict = NULL;
@@ -337,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
goto out;
}
- if (ret == -1 && (time(NULL) - start_time) >= com_period) {
- if ((time(NULL) - stbuf.ia_mtime) >= com_period) {
+ now = gf_time();
+
+ if (ret == -1 && (now - start_time) >= com_period) {
+ if ((now - stbuf.ia_mtime) >= com_period) {
ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state,
&stbuf);
if (ret) {
@@ -352,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
op_errno = 0;
goto out;
}
- } else if (ret == -1 && (time(NULL) - start_time) < com_period) {
+ } else if (ret == -1 && (now - start_time) < com_period) {
op_errno = 0;
goto out;
- } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) {
+ } else if (reten_state.retain && ((now >= stbuf.ia_atime))) {
gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf);
}
if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable &&
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index e4042117427..e5f93063943 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -513,6 +513,9 @@ shard_local_wipe(shard_local_t *local)
loc_wipe(&local->int_entrylk.loc);
loc_wipe(&local->newloc);
+ if (local->name)
+ GF_FREE(local->name);
+
if (local->int_entrylk.basename)
GF_FREE(local->int_entrylk.basename);
if (local->fd)
@@ -1001,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
}
int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
shard_post_resolve_fop_handler_t post_res_handler)
{
@@ -1017,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
shard_local_t *local = NULL;
+ uint64_t resolve_count = 0;
priv = this->private;
local = frame->local;
local->call_count = 0;
shard_idx_iter = local->first_block;
res_inode = local->resolver_base_inode;
+
+ if ((local->op_ret < 0) || (local->resolve_not))
+ goto out;
+
+ /* If this prealloc FOP is for fresh file creation, then the size of the
+ * file will be 0. Then there will be no shards associated with this file.
+ * So we can skip the lookup process for the shards which do not exists
+ * and directly issue mknod to crete shards.
+ *
+ * In case the prealloc fop is to extend the preallocated file to bigger
+ * size then just lookup and populate inodes of existing shards and
+ * update the create count
+ */
+ if (local->fop == GF_FOP_FALLOCATE) {
+ if (!local->prebuf.ia_size) {
+ local->inode_list[0] = inode_ref(res_inode);
+ local->create_count = local->last_block;
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ return 0;
+ }
+ if (local->prebuf.ia_size < local->total_size)
+ local->create_count = local->last_block -
+ ((local->prebuf.ia_size - 1) /
+ local->block_size);
+ }
+
+ resolve_count = local->last_block - local->create_count;
+
if (res_inode)
gf_uuid_copy(gfid, res_inode->gfid);
else
gf_uuid_copy(gfid, local->base_gfid);
- if ((local->op_ret < 0) || (local->resolve_not))
- goto out;
-
- while (shard_idx_iter <= local->last_block) {
+ while (shard_idx_iter <= resolve_count) {
i++;
if (shard_idx_iter == 0) {
local->inode_list[i] = inode_ref(res_inode);
@@ -1659,26 +1692,24 @@ err:
}
int
-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
+shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
int ret = -1;
int32_t mask = SHARD_INODE_WRITE_MASK;
- shard_local_t *local = NULL;
+ shard_local_t *local = frame->local;
shard_inode_ctx_t ctx = {
0,
};
- local = frame->local;
-
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
"Lookup on base file"
" failed : %s",
- loc_gfid_utoa(&(local->loc)));
+ uuid_utoa(inode->gfid));
local->op_ret = op_ret;
local->op_errno = op_errno;
goto unwind;
@@ -1712,18 +1743,57 @@ unwind:
}
int
-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
- shard_post_fop_handler_t handler)
+shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ shard_local_t *local = frame->local;
+
+ shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
+ op_errno, buf, xdata);
+ return 0;
+}
+
+int
+shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ /* In case of op_ret < 0, inode passed to this function will be NULL
+ ex: in case of op_errno = ENOENT. So refer prefilled inode data
+ which is part of local.
+ Note: Reassigning/overriding the inode passed to this cbk with inode
+ which is part of *struct shard_local_t* won't cause any issue as
+ both inodes have same reference/address as of the inode passed */
+ inode = ((shard_local_t *)frame->local)->loc.inode;
+
+ shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+/* This function decides whether to make file based lookup or
+ * fd based lookup (fstat) depending on the 3rd and 4th arg.
+ * If fd != NULL and loc == NULL then call is for fstat
+ * If fd == NULL and loc != NULL then call is for file based
+ * lookup. Please pass args based on the requirement.
+ */
+int
+shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, shard_post_fop_handler_t handler)
{
int ret = -1;
+ inode_t *inode = NULL;
shard_local_t *local = NULL;
dict_t *xattr_req = NULL;
gf_boolean_t need_refresh = _gf_false;
local = frame->local;
local->handler = handler;
+ inode = fd ? fd->inode : loc->inode;
- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+ ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
&need_refresh);
/* By this time, inode ctx should have been created either in create,
* mknod, readdirp or lookup. If not it is a bug!
@@ -1732,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_msg_debug(this->name, 0,
"Skipping lookup on base file: %s"
"Serving prebuf off the inode ctx cache",
- uuid_utoa(loc->gfid));
+ uuid_utoa(inode->gfid));
goto out;
}
@@ -1743,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ if (fd)
+ STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
+ else
+ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
dict_unref(xattr_req);
return 0;
@@ -2015,8 +2089,8 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
*/
if (!inode) {
gf_msg_debug(this->name, 0,
- "Last shard to be truncated absent in backend: %d of "
- "gfid %s. Directly proceeding to update file size",
+ "Last shard to be truncated absent in backend: %" PRIu64
+ " of gfid %s. Directly proceeding to update file size",
local->first_block, uuid_utoa(local->loc.inode->gfid));
shard_update_file_size(frame, this, NULL, &local->loc,
shard_post_update_size_truncate_handler);
@@ -2399,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
int count = 0;
int call_count = 0;
int32_t shard_idx_iter = 0;
- int last_block = 0;
+ int lookup_count = 0;
char path[PATH_MAX] = {
0,
};
@@ -2419,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
local = frame->local;
count = call_count = local->call_count;
shard_idx_iter = local->first_block;
- last_block = local->last_block;
+ lookup_count = local->last_block - local->create_count;
local->pls_fop_handler = handler;
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
@@ -2429,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
else
gf_uuid_copy(gfid, local->base_gfid);
- while (shard_idx_iter <= last_block) {
+ while (shard_idx_iter <= lookup_count) {
if (local->inode_list[i]) {
i++;
shard_idx_iter++;
@@ -2574,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
? local->loc.inode
: local->fd->inode;
@@ -2723,8 +2798,8 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
local->resolver_base_inode = loc->inode;
GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_truncate_handler);
return 0;
err:
@@ -2779,8 +2854,8 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->resolver_base_inode = fd->inode;
GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_truncate_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
@@ -2924,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!local->xattr_req)
goto err;
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_link_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_link_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
@@ -4254,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
switch (local->fop) {
case GF_FOP_UNLINK:
case GF_FOP_RENAME:
- shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
- shard_post_lookup_base_shard_rm_handler);
+ shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
+ shard_post_lookup_base_shard_rm_handler);
break;
default:
gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
@@ -4510,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->block_size) {
local->tmp_loc.inode = inode_new(this->itable);
gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
- shard_lookup_base_file(frame, this, &local->tmp_loc,
- shard_post_rename_lookup_handler);
+ shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
+ shard_post_rename_lookup_handler);
} else {
shard_rename_cbk(frame, this);
}
@@ -5150,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = local->loc.inode;
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
@@ -5246,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_readv_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_readv_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
@@ -5610,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
shard_common_lookup_shards(
frame, this, local->resolver_base_inode,
shard_common_inode_write_post_lookup_shards_handler);
+ } else if (local->create_count) {
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
} else {
shard_common_inode_write_do(frame, this);
}
@@ -5640,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
local->last_block = get_highest_block(local->offset, local->total_size,
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
gf_shard_mt_inode_list);
if (!local->inode_list) {
@@ -5648,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
}
gf_msg_trace(this->name, 0,
- "%s: gfid=%s first_block=%" PRIu32
+ "%s: gfid=%s first_block=%" PRIu64
" "
- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64
+ "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64
" total_size=%zu flags=%" PRId32 "",
gf_fop_list[local->fop],
uuid_utoa(local->resolver_base_inode->gfid),
@@ -6045,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_fsync_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_fsync_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
@@ -6238,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
int32_t
-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local,
+ char *key)
{
- int op_errno = EINVAL;
+ int ret = 0;
+ struct iatt *tmpbuf = NULL;
+ struct iatt *stbuf = NULL;
+ data_t *data = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, key);
+ if (!data)
+ return 0;
+
+ tmpbuf = data_to_iatt(data, key);
+ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+ if (stbuf == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
}
+ *stbuf = *tmpbuf;
+ stbuf->ia_size = local->prebuf.ia_size;
+ stbuf->ia_blocks = local->prebuf.ia_blocks;
+ ret = dict_set_iatt(xdata, key, stbuf, false);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ return 0;
- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+err:
+ GF_FREE(stbuf);
+ return -1;
+}
+
+int32_t
+shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, local->fd,
+ local->name, local->xattr_req);
+ else
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &local->loc,
+ local->name, local->xattr_req);
+ return 0;
+}
+
+int32_t
+shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ /* If shard's special xattrs are attempted to be removed,
+ * fail the fop with EPERM (except if the client is gsyncd).
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err);
}
+ /* Repeat the same check for bulk-removexattr */
if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+
+ if (name) {
+ local->name = gf_strdup(name);
+ if (!local->name)
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_remove_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name,
+ xdata);
return 0;
}
@@ -6360,38 +6601,164 @@ out:
}
int32_t
-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = EINVAL;
+ int ret = -1;
+ shard_local_t *local = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, local->fd,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ else
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ return 0;
+}
+
+int32_t
+shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+
+ /* Sharded or not, if shard's special xattrs are attempted to be set,
+ * fail the fop with EPERM (except if the client is gsyncd.
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err);
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+ local->flags = flags;
+ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict
+ * and the xdata dict
+ */
+ if (dict)
+ local->xattr_req = dict_ref(dict);
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_set_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
+ xdata);
return 0;
}
@@ -6654,8 +7021,8 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_common_inode_write_post_lookup_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_common_inode_write_post_lookup_handler);
return 0;
out:
shard_common_failure_unwind(fop, frame, -1, ENOMEM);
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 04abd62c21c..4fe181b64d5 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame,
typedef struct shard_local {
int op_ret;
int op_errno;
- int first_block;
- int last_block;
- int num_blocks;
+ uint64_t first_block;
+ uint64_t last_block;
+ uint64_t num_blocks;
int call_count;
int eexist_count;
int create_count;
@@ -318,6 +318,7 @@ typedef struct shard_local {
uint32_t deletion_rate;
gf_boolean_t cleanup_required;
uuid_t base_gfid;
+ char *name;
} shard_local_t;
typedef struct shard_inode_ctx {
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index f44b11c6872..7d09cba3e9c 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -212,11 +212,11 @@ void
append_time_stamp(char *name, size_t name_size)
{
int i;
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS);
+ gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS);
/* removing white spaces in timestamp */
for (i = 0; i < strlen(timestr); i++) {
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
index 978825f6b56..c641bd6f432 100644
--- a/xlators/features/upcall/src/upcall-internal.c
+++ b/xlators/features/upcall/src/upcall-internal.c
@@ -316,7 +316,7 @@ upcall_reaper_thread(void *data)
priv = this->private;
GF_ASSERT(priv);
- time_now = time(NULL);
+ time_now = gf_time();
while (!priv->fini) {
list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list,
inode_ctx_list)
@@ -344,7 +344,7 @@ upcall_reaper_thread(void *data)
/* don't do a very busy loop */
timeout = get_cache_invalidation_timeout(this);
sleep(timeout / 2);
- time_now = time(NULL);
+ time_now = gf_time();
}
return NULL;
@@ -533,7 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
goto out;
}
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
@@ -670,13 +670,13 @@ upcall_cache_forget(xlator_t *this, inode_t *inode,
return;
}
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
&up_inode_ctx->client_list, client_list)
{
- /* Set the access time to time(NULL)
+ /* Set the access time to gf_time()
* to send notify */
up_client_entry->access_time = time_now;