summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src
diff options
context:
space:
mode:
authorMohit Agrawal <moagrawa@redhat.com>2017-11-28 11:26:10 +0530
committerKrutika Dhananjay <kdhananj@redhat.com>2017-12-01 10:17:44 +0000
commit185917c7a621ce0971e883b35221895ef235ca18 (patch)
tree2b6f8fd79dff79969e096e606c7027cb83942dd8 /xlators/storage/posix/src
parent48e3ae7482a7c70dc130dc7f8198636a87649d54 (diff)
posix: Convert posix_fs_health_check asynchrnously to save timestamp
Problem: Sometime posix_fs_health_check thread is blocked on write/read call while backend device deleted abruptly. Solution: To resolve it convert code to update timestamp asynchrnously. BUG: 1501132 Change-Id: Id68ea6a572bf68fbf437e1d9be5221b63d47ff9c Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Diffstat (limited to 'xlators/storage/posix/src')
-rw-r--r--xlators/storage/posix/src/posix-helpers.c98
-rw-r--r--xlators/storage/posix/src/posix.c15
-rw-r--r--xlators/storage/posix/src/posix.h2
3 files changed, 99 insertions, 16 deletions
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 77affc45ae0..5a3f4b129fb 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -18,6 +18,7 @@
#include <ftw.h>
#include <sys/stat.h>
#include <signal.h>
+#include <aio.h>
#ifdef HAVE_SYS_ACL_H
#ifdef HAVE_ACL_LIBACL_H /* for acl_to_any_text() */
@@ -1768,44 +1769,108 @@ posix_fs_health_check (xlator_t *this)
char timestamp[256] = {0,};
int fd = -1;
int timelen = -1;
- int nofbytes = 0;
time_t time_sec = {0,};
- char buff[64] = {0};
+ char buff[256] = {0};
char file_path[PATH_MAX] = {0};
char *op = NULL;
int op_errno = 0;
+ int cnt = 0;
+ int timeout = 0;
+ struct aiocb aiocb;
GF_VALIDATE_OR_GOTO (this->name, this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out);
subvol_path = priv->base_path;
- snprintf (file_path, sizeof (file_path), "%s/%s/health_check",
+ timeout = priv->health_check_timeout;
+ snprintf (file_path, sizeof (file_path)-1, "%s/%s/health_check",
subvol_path, GF_HIDDEN_PATH);
time_sec = time (NULL);
gf_time_fmt (timestamp, sizeof timestamp, time_sec, gf_timefmt_FT);
timelen = strlen (timestamp);
- fd = open (file_path, O_CREAT|O_RDWR, 0644);
+ fd = open (file_path, O_CREAT|O_WRONLY|O_TRUNC, 0644);
if (fd == -1) {
op_errno = errno;
- op = "open";
+ op = "open_for_write";
goto out;
}
- nofbytes = sys_write (fd, timestamp, timelen);
- if (nofbytes < 0) {
+ memset(&aiocb, 0, sizeof(struct aiocb));
+ aiocb.aio_fildes = fd;
+ aiocb.aio_buf = timestamp;
+ aiocb.aio_nbytes = timelen;
+ aiocb.aio_sigevent.sigev_notify = SIGEV_NONE;
+ if (aio_write(&aiocb) == -1) {
op_errno = errno;
- op = "write";
+ op = "aio_write";
goto out;
}
- /* Seek the offset to the beginning of the file, so that the offset for
- read is from beginning of file */
- sys_lseek(fd, 0, SEEK_SET);
- nofbytes = sys_read (fd, buff, timelen);
- if (nofbytes == -1) {
+
+ /* Wait until write completion */
+ while ((aio_error (&aiocb) == EINPROGRESS) && (++cnt <= timeout))
+ sleep (1);
+
+ ret = aio_error (&aiocb);
+ if (ret != 0) {
+ op_errno = errno;
+ op = "aio_write_error";
+ ret = -1;
+ goto out;
+ }
+
+ ret = aio_return (&aiocb);
+ if (ret != timelen) {
+ op_errno = errno;
+ op = "aio_write_buf";
+ ret = -1;
+ goto out;
+ }
+
+ sys_close (fd);
+
+ fd = open (file_path, O_RDONLY);
+ if (fd == -1) {
+ op_errno = errno;
+ op = "open_for_read";
+ goto out;
+ }
+
+ memset(&aiocb, 0, sizeof(struct aiocb));
+ aiocb.aio_fildes = fd;
+ aiocb.aio_buf = buff;
+ aiocb.aio_nbytes = sizeof(buff);
+ if (aio_read(&aiocb) == -1) {
op_errno = errno;
- op = "read";
+ op = "aio_read";
+ goto out;
+ }
+ cnt = 0;
+ /* Wait until read completion */
+ while ((aio_error (&aiocb) == EINPROGRESS) && (++cnt <= timeout))
+ sleep (1);
+
+ ret = aio_error (&aiocb);
+ if (ret != 0) {
+ op_errno = errno;
+ op = "aio_read_error";
+ ret = -1;
+ goto out;
+ }
+
+ ret = aio_return (&aiocb);
+ if (ret != timelen) {
+ op_errno = errno;
+ op = "aio_read_buf";
+ ret = -1;
+ goto out;
+ }
+
+ if (memcmp (timestamp, buff, ret)) {
+ op_errno = EUCLEAN;
+ op = "aio_read_cmp_buf";
+ ret = -1;
goto out;
}
ret = 0;
@@ -1818,8 +1883,9 @@ out:
P_MSG_HEALTHCHECK_FAILED,
"%s() on %s returned", op, file_path);
gf_event (EVENT_POSIX_HEALTH_CHECK_FAILED,
- "op=%s;path=%s;error=%s;brick=%s:%s", op, file_path,
- strerror (op_errno), priv->hostname, priv->base_path);
+ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d",
+ op, file_path, strerror (op_errno), priv->hostname,
+ priv->base_path, timeout);
}
return ret;
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 9ba2edf9fdf..0435d7a677f 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -7252,6 +7252,8 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
options, uint32, out);
+ GF_OPTION_RECONF ("health-check-timeout", priv->health_check_timeout,
+ options, uint32, out);
posix_spawn_health_check_thread (this);
GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count,
@@ -7858,6 +7860,8 @@ init (xlator_t *this)
_private->health_check_active = _gf_false;
GF_OPTION_INIT ("health-check-interval",
_private->health_check_interval, uint32, out);
+ GF_OPTION_INIT ("health-check-timeout",
+ _private->health_check_timeout, uint32, out);
if (_private->health_check_interval)
posix_spawn_health_check_thread (this);
@@ -8090,6 +8094,17 @@ struct volume_options options[] = {
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC
},
{
+ .key = {"health-check-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "10",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Interval in seconds to wait aio_write finish for health check, "
+ "set to 0 to disable",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC
+ },
+ {
.key = {"reserve"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index dc8ac0106ab..8db87f1de9f 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -210,6 +210,8 @@ struct posix_private {
/* seconds to sleep between health checks */
uint32_t health_check_interval;
+ /* seconds to sleep to wait for aio write finish for health checks */
+ uint32_t health_check_timeout;
pthread_t health_check;
gf_boolean_t health_check_active;