summaryrefslogtreecommitdiffstats
path: root/xlators/storage
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage')
-rw-r--r--xlators/storage/posix/src/posix-aio.c5
-rw-r--r--xlators/storage/posix/src/posix-helpers.c30
-rw-r--r--xlators/storage/posix/src/posix.c130
-rw-r--r--xlators/storage/posix/src/posix.h12
4 files changed, 168 insertions, 9 deletions
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
index d8ef5f7b73f..636108affbb 100644
--- a/xlators/storage/posix/src/posix-aio.c
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -331,6 +331,11 @@ posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (!posix_write_ok (this, priv)) {
+ op_errno = ENOSPC;
+ goto err;
+ }
+
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 10e91370440..1ebff47879d 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -805,6 +805,7 @@ posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
int ret = 0;
ssize_t size = 0;
struct stat stat = {0, };
+ char *new_uuid = NULL;
if (!xattr_req)
@@ -813,12 +814,6 @@ posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (sys_lstat (path, &stat) != 0)
goto out;
- size = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
- if (size == 16) {
- ret = 0;
- goto verify_handle;
- }
-
ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req);
if (ret) {
gf_msg_debug (this->name, 0,
@@ -827,7 +822,28 @@ posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
goto out;
}
- ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE);
+ size = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (size == 16) {
+ if (!gf_uuid_compare (uuid_curr, uuid_req)) {
+ ret = 0;
+ goto verify_handle;
+ }
+
+ /* File has an existing GFID which differs from
+ * the requested one. This can occur when a subvolume
+ * has been offline while a file is deleted, and then
+ * comes back up but has not yet healed. Get rid of
+ * the old GFID link (handle_unset) and fall through
+ * to the set case below.
+ */
+ new_uuid = strdupa (uuid_utoa (uuid_req));
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: existing gfid %s overwritten with %s.",
+ path, uuid_utoa (uuid_curr), new_uuid);
+ posix_handle_unset (this, uuid_curr, NULL);
+ }
+
+ ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, 0);
if (ret == -1) {
gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_GFID_FAILED,
"setting GFID on %s failed ", path);
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index aa5a526423f..49665884a7e 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -672,6 +672,81 @@ out:
return 0;
}
+static gf_boolean_t freespace_ok (xlator_t *this, const struct statvfs *stats,
+ double min_free_disk,
+ gf_boolean_t previously_ok)
+{
+ gf_boolean_t currently_ok;
+
+ if (min_free_disk < 100.0) {
+ double free_percent = 100.0 * stats->f_bavail / stats->f_blocks;
+
+ currently_ok =
+ free_percent >= min_free_disk ? _gf_true : _gf_false;
+ if (previously_ok && !currently_ok) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "min-free-disk limit exceeded: free percent "
+ "%f%% < %f%%. Writes disabled.",
+ free_percent, min_free_disk);
+ }
+ } else {
+ double free_bytes = stats->f_bavail * stats->f_frsize;
+
+ currently_ok =
+ free_bytes >= min_free_disk ? _gf_true : _gf_false;
+ if (previously_ok && !currently_ok) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "min-free-disk limit exceeded: free bytes %f "
+ "< %f. Writes disabled.",
+ free_bytes, min_free_disk);
+ }
+ }
+
+ if (currently_ok && !previously_ok) {
+ gf_log (this->name, GF_LOG_INFO, "Free space has risen above "
+ "min-free-disk limit, writes "
+ "re-enabled.");
+ }
+
+ return currently_ok;
+}
+
+gf_boolean_t
+posix_write_ok (xlator_t *this, struct posix_private *priv)
+{
+ /* Check if there is sufficient free space to allow writes.
+ *
+ * This is called in the write path, so performance matters. We
+ * periodically sample free space by calling statvfs().
+ * freespace_check_lock is used to ensure only one process at a
+ * time makes the call; if the lock is contended, the previous
+ * status (reflected in freespace_check_passed) is used while
+ * the process that holds the mutex updates the current status.
+ */
+ if (!priv->freespace_check_interval) {
+ return _gf_true;
+ }
+
+ if (!pthread_mutex_trylock (&priv->freespace_check_lock)) {
+ struct timespec now;
+
+ clock_gettime (CLOCK_MONOTONIC, &now);
+ if (now.tv_sec >= priv->freespace_check_last.tv_sec +
+ priv->freespace_check_interval) {
+ sys_statvfs (priv->base_path, &priv->freespace_stats);
+ priv->freespace_check_last.tv_sec = now.tv_sec;
+
+ priv->freespace_check_passed = freespace_ok (
+ this, &priv->freespace_stats, priv->min_free_disk,
+ priv->freespace_check_passed);
+ }
+
+ pthread_mutex_unlock (&priv->freespace_check_lock);
+ }
+
+ return priv->freespace_check_passed;
+}
+
static int32_t
posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t flags, off_t offset, size_t len,
@@ -681,6 +756,7 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t op_errno = 0;
struct posix_fd *pfd = NULL;
gf_boolean_t locked = _gf_false;
+ struct posix_private *priv = this->private;
DECLARE_OLD_FS_ID_VAR;
@@ -689,6 +765,12 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (priv, out);
+
+ if (!posix_write_ok (this, priv)) {
+ ret = -ENOSPC;
+ goto out;
+ }
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
@@ -3321,6 +3403,12 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (priv, out);
+ if (!posix_write_ok (this, priv)) {
+ op_errno = ENOSPC;
+ op_ret = -1;
+ goto out;
+ }
+
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg (this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
@@ -6685,6 +6773,16 @@ struct posix_private *priv = NULL;
options, uint32, out);
posix_spawn_health_check_thread (this);
+ pthread_mutex_lock (&priv->freespace_check_lock);
+ {
+ GF_OPTION_RECONF ("freespace-check-interval",
+ priv->freespace_check_interval,
+ options, uint32, out);
+ GF_OPTION_RECONF ("min-free-disk", priv->min_free_disk, options,
+ percent_or_size, out);
+ }
+ pthread_mutex_unlock (&priv->freespace_check_lock);
+
ret = 0;
out:
return ret;
@@ -7299,6 +7397,19 @@ init (xlator_t *this)
GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec,
uint32, out);
+
+ GF_OPTION_INIT ("freespace-check-interval",
+ _private->freespace_check_interval, uint32, out);
+
+ GF_OPTION_INIT ("min-free-disk", _private->min_free_disk,
+ percent_or_size, out);
+
+ pthread_mutex_init (&_private->freespace_check_lock, NULL);
+ sys_statvfs (_private->base_path, &_private->freespace_stats);
+ clock_gettime (CLOCK_MONOTONIC, &_private->freespace_check_last);
+ _private->freespace_check_passed = freespace_ok (
+ this, &_private->freespace_stats, _private->min_free_disk,
+ _gf_true);
out:
return ret;
}
@@ -7463,7 +7574,7 @@ struct volume_options options[] = {
},
{ .key = {"update-link-count-parent"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
+ .default_value = "on",
.description = "Enable placeholders for gfid to path conversion"
},
#if GF_DARWIN_HOST_OS
@@ -7476,5 +7587,22 @@ struct volume_options options[] = {
"\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n"
},
#endif
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "2%",
+ .description = "Minimum percentage/size of disk space, after which we"
+ "start failing writes with ENOSPC."
+ },
+ {
+ .key = {"freespace-check-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "5",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Interval in seconds between freespace measurements "
+ "used for the min-free-disk determination. "
+ "Set to 0 to disable."
+ },
+
{ .key = {NULL} }
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 87f91e57747..ef4bc66ecbc 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -174,7 +174,14 @@ struct posix_private {
XATTR_BOTH,
} xattr_user_namespace;
#endif
-
+ /* freespace_check_lock protects access to following three fields. */
+ pthread_mutex_t freespace_check_lock;
+ struct timespec freespace_check_last;
+ struct statvfs freespace_stats;
+ double min_free_disk;
+ /* mutex protection ends. */
+ uint32_t freespace_check_interval;
+ gf_boolean_t freespace_check_passed;
};
typedef struct {
@@ -263,6 +270,9 @@ posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
void
posix_gfid_unset (xlator_t *this, dict_t *xdata);
+gf_boolean_t
+posix_write_ok (xlator_t *this, struct posix_private *priv);
+
int
posix_pacl_set (const char *path, const char *key, const char *acl_s);