diff options
Diffstat (limited to 'xlators/storage/posix/src/posix-helpers.c')
| -rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 614 |
1 files changed, 540 insertions, 74 deletions
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 58708a347..5725cad7d 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -22,6 +22,7 @@ #include <pthread.h> #include <ftw.h> #include <sys/stat.h> +#include <signal.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -44,25 +45,22 @@ #include "timer.h" #include "glusterfs3-xdr.h" #include "hashfn.h" +#include "glusterfs-acl.h" #include <fnmatch.h> -typedef struct { - xlator_t *this; - const char *real_path; - dict_t *xattr; - struct iatt *stbuf; - loc_t *loc; -} posix_xattr_filler_t; - char *marker_xattrs[] = {"trusted.glusterfs.quota.*", "trusted.glusterfs.*.xtime", NULL}; +char *marker_contri_key = "trusted.*.*.contri"; + static char* posix_ignore_xattrs[] = { "gfid-req", GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, GLUSTERFS_POSIXLK_COUNT, + GLUSTERFS_PARENT_ENTRYLK, + GF_GFIDLESS_LOOKUP, NULL }; @@ -108,14 +106,142 @@ out: } static int +_posix_xattr_get_set_from_backend (posix_xattr_filler_t *filler, char *key) +{ + ssize_t xattr_size = -1; + int ret = 0; + char *value = NULL; + + xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0); + + if (xattr_size > 0) { + value = GF_CALLOC (1, xattr_size + 1, + gf_posix_mt_char); + if (!value) + goto out; + + xattr_size = sys_lgetxattr (filler->real_path, key, value, + xattr_size); + if (xattr_size <= 0) { + gf_log (filler->this->name, GF_LOG_WARNING, + "getxattr failed. path: %s, key: %s", + filler->real_path, key); + GF_FREE (value); + goto out; + } + + value[xattr_size] = '\0'; + ret = dict_set_bin (filler->xattr, key, + value, xattr_size); + if (ret < 0) { + gf_log (filler->this->name, GF_LOG_DEBUG, + "dict set failed. path: %s, key: %s", + filler->real_path, key); + GF_FREE (value); + goto out; + } + } + ret = 0; +out: + return ret; +} + +static int gf_posix_xattr_enotsup_log; + +static int +_posix_get_marker_all_contributions (posix_xattr_filler_t *filler) +{ + ssize_t size = -1, remaining_size = -1, list_offset = 0; + int ret = -1; + char *list = NULL, key[4096] = {0, }; + + size = sys_llistxattr (filler->real_path, NULL, 0); + if (size == -1) { + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + THIS->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting brick" + " with 'user_xattr' flag)"); + + } else { + gf_log (THIS->name, GF_LOG_WARNING, + "listxattr failed on %s: %s", + filler->real_path, strerror (errno)); + + } + + goto out; + } + + if (size == 0) { + ret = 0; + goto out; + } + + list = alloca (size + 1); + if (!list) { + goto out; + } + + size = sys_llistxattr (filler->real_path, list, size); + if (size <= 0) { + ret = size; + goto out; + } + + remaining_size = size; + list_offset = 0; + + while (remaining_size > 0) { + if (*(list + list_offset) == '\0') + break; + strcpy (key, list + list_offset); + if (fnmatch (marker_contri_key, key, 0) == 0) { + ret = _posix_xattr_get_set_from_backend (filler, key); + } + + remaining_size -= strlen (key) + 1; + list_offset += strlen (key) + 1; + } + + ret = 0; + +out: + return ret; +} + +static int +_posix_get_marker_quota_contributions (posix_xattr_filler_t *filler, char *key) +{ + char *saveptr = NULL, *token = NULL, *tmp_key = NULL; + char *ptr = NULL; + int i = 0, ret = 0; + + tmp_key = ptr = gf_strdup (key); + for (i = 0; i < 4; i++) { + token = strtok_r (tmp_key, ".", &saveptr); + tmp_key = NULL; + } + + if (strncmp (token, "contri", strlen ("contri")) == 0) { + ret = _posix_get_marker_all_contributions (filler); + } else { + ret = _posix_xattr_get_set_from_backend (filler, key); + } + + GF_FREE (ptr); + + return ret; +} + +static int _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data, void *xattrargs) { posix_xattr_filler_t *filler = xattrargs; - char *value = NULL; - ssize_t xattr_size = -1; int ret = -1; char *databuf = NULL; int _fd = -1; @@ -140,6 +266,16 @@ _posix_xattr_get_set (dict_t *xattr_req, goto err; } + /* + * There could be a situation where the ia_size is + * zero. GF_CALLOC will return a pointer to the + * memory initialized by gf_mem_set_acct_info. + * This function adds a header and a footer to + * the allocated memory. The returned pointer + * points to the memory just after the header, but + * when size is zero, there is no space for user + * data. The memory can be freed by calling GF_FREE. + */ databuf = GF_CALLOC (1, filler->stbuf->ia_size, gf_posix_mt_char); if (!databuf) { @@ -181,48 +317,34 @@ _posix_xattr_get_set (dict_t *xattr_req, } } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) { loc = filler->loc; - if (loc && !list_empty (&loc->inode->fd_list)) { - ret = dict_set_uint32 (filler->xattr, key, 1); - if (ret < 0) - gf_log (filler->this->name, GF_LOG_WARNING, - "Failed to set dictionary value for %s", - key); - } else { - ret = dict_set_uint32 (filler->xattr, key, 0); + if (loc) { + ret = dict_set_uint32 (filler->xattr, key, + loc->inode->fd_count); if (ret < 0) gf_log (filler->this->name, GF_LOG_WARNING, "Failed to set dictionary value for %s", key); } - } else { - xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0); - - if (xattr_size > 0) { - value = GF_CALLOC (1, xattr_size + 1, - gf_posix_mt_char); - if (!value) - return -1; - - xattr_size = sys_lgetxattr (filler->real_path, key, value, - xattr_size); - if (xattr_size <= 0) { - gf_log (filler->this->name, GF_LOG_WARNING, - "getxattr failed. path: %s, key: %s", - filler->real_path, key); - GF_FREE (value); - return -1; - } + } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) { + char *path = NULL; + ret = posix_get_ancestry (filler->this, filler->loc->inode, + NULL, &path, POSIX_ANCESTRY_PATH, + &filler->op_errno, xattr_req); + if (ret < 0) { + goto out; + } - value[xattr_size] = '\0'; - ret = dict_set_bin (filler->xattr, key, - value, xattr_size); - if (ret < 0) { - gf_log (filler->this->name, GF_LOG_DEBUG, - "dict set failed. path: %s, key: %s", - filler->real_path, key); - GF_FREE (value); - } + ret = dict_set_dynstr (filler->xattr, GET_ANCESTRY_PATH_KEY, + path); + if (ret < 0) { + GF_FREE (path); + goto out; } + + } else if (fnmatch (marker_contri_key, key, 0) == 0) { + ret = _posix_get_marker_quota_contributions (filler, key); + } else { + ret = _posix_xattr_get_set_from_backend (filler, key); } out: return 0; @@ -281,7 +403,7 @@ posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf) goto out; } for (i = 15; i > (15 - 8); i--) { - temp_ino += (uint64_t)(buf->ia_gfid[i]) << j; + temp_ino += (uint64_t)(buf->ia_gfid[i]) << j; j += 8; } buf->ia_ino = temp_ino; @@ -671,7 +793,10 @@ posix_handle_pair (xlator_t *this, const char *real_path, int sys_ret = -1; int ret = 0; - if (ZR_FILE_CONTENT_REQUEST(key)) { + if (XATTR_IS_PATHINFO (key)) { + ret = -EACCES; + goto out; + } else if (ZR_FILE_CONTENT_REQUEST(key)) { ret = posix_set_file_contents (this, real_path, key, value, flags); } else { @@ -679,6 +804,7 @@ posix_handle_pair (xlator_t *this, const char *real_path, value->len, flags); if (sys_ret < 0) { + ret = -errno; if (errno == ENOTSUP) { GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log, this->name,GF_LOG_WARNING, @@ -710,7 +836,6 @@ posix_handle_pair (xlator_t *this, const char *real_path, #endif /* DARWIN */ } - ret = -errno; goto out; } } @@ -725,10 +850,16 @@ posix_fhandle_pair (xlator_t *this, int fd, int sys_ret = -1; int ret = 0; + if (XATTR_IS_PATHINFO (key)) { + ret = -EACCES; + goto out; + } + sys_ret = sys_fsetxattr (fd, key, value->data, value->len, flags); if (sys_ret < 0) { + ret = -errno; if (errno == ENOTSUP) { GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log, this->name,GF_LOG_WARNING, @@ -755,7 +886,6 @@ posix_fhandle_pair (xlator_t *this, int fd, #endif /* DARWIN */ } - ret = -errno; goto out; } @@ -896,8 +1026,8 @@ posix_spawn_janitor_thread (xlator_t *this) LOCK (&priv->lock); { if (!priv->janitor_present) { - ret = pthread_create (&priv->janitor, NULL, - posix_janitor_thread_proc, this); + ret = gf_thread_create (&priv->janitor, NULL, + posix_janitor_thread_proc, this); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, @@ -913,6 +1043,74 @@ unlock: UNLOCK (&priv->lock); } +static int +is_fresh_file (struct stat *stat) +{ + struct timeval tv; + + gettimeofday (&tv, NULL); + + if ((stat->st_ctime >= (tv.tv_sec - 1)) + && (stat->st_ctime <= tv.tv_sec)) + return 1; + + return 0; +} + + +int +posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) +{ + /* The purpose of this function is to prevent a race + where an inode creation FOP (like mkdir/mknod/create etc) + races with lookup in the following way: + + {create thread} | {lookup thread} + | + t0 + mkdir ("name") | + t1 + | posix_gfid_set ("name", 2); + t2 + posix_gfid_set ("name", 1); | + t3 + lstat ("name"); | lstat ("name"); + + In the above case mkdir FOP would have resulted with GFID 2 while + it should have been GFID 1. It matters in the case where GFID would + have gotten set to 1 on other subvolumes of replciate/distribute + + The "solution" here is that, if we detect lookup is attempting to + set a GFID on a file which is created very recently, but does not + yet have a GFID (i.e, between t1 and t2), then "fake" it as though + posix_gfid_heal was called at t0 instead. + */ + + uuid_t uuid_curr; + int ret = 0; + struct stat stat = {0, }; + + if (!xattr_req) + goto out; + + if (sys_lstat (path, &stat) != 0) + goto out; + + ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16); + if (ret != 16) { + if (is_fresh_file (&stat)) { + ret = -1; + errno = ENOENT; + goto out; + } + } + + ret = posix_gfid_set (this, path, loc, xattr_req); +out: + return ret; +} + + int posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req) { @@ -926,17 +1124,17 @@ posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req) if (sys_lstat (path, &stat) != 0) goto out; - data = dict_get (xattr_req, "system.posix_acl_access"); + data = dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR); if (data) { - ret = sys_lsetxattr (path, "system.posix_acl_access", + ret = sys_lsetxattr (path, POSIX_ACL_ACCESS_XATTR, data->data, data->len, 0); if (ret != 0) goto out; } - data = dict_get (xattr_req, "system.posix_acl_default"); + data = dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR); if (data) { - ret = sys_lsetxattr (path, "system.posix_acl_default", + ret = sys_lsetxattr (path, POSIX_ACL_DEFAULT_XATTR, data->data, data->len, 0); if (ret != 0) goto out; @@ -946,35 +1144,47 @@ out: return ret; } +static int +_handle_entry_create_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + int ret = -1; + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + if (!strcmp (GFID_XATTR_KEY, k) || + !strcmp ("gfid-req", k) || + !strcmp (POSIX_ACL_DEFAULT_XATTR, k) || + !strcmp (POSIX_ACL_ACCESS_XATTR, k) || + ZR_FILE_CONTENT_REQUEST(k)) { + return 0; + } + + ret = posix_handle_pair (filler->this, filler->real_path, k, v, + XATTR_CREATE); + if (ret < 0) { + errno = -ret; + return -1; + } + return 0; +} + int posix_entry_create_xattr_set (xlator_t *this, const char *path, dict_t *dict) { int ret = -1; + posix_xattr_filler_t filler = {0,}; + if (!dict) goto out; - int _handle_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) - { - if (!strcmp (GFID_XATTR_KEY, k) || - !strcmp ("gfid-req", k) || - !strcmp ("system.posix_acl_default", k) || - !strcmp ("system.posix_acl_access", k) || - ZR_FILE_CONTENT_REQUEST(k)) { - return 0; - } + filler.this = this; + filler.real_path = path; - ret = posix_handle_pair (this, path, k, v, XATTR_CREATE); - if (ret < 0) { - errno = -ret; - return -1; - } - return 0; - } - - ret = dict_foreach (dict, _handle_keyvalue_pair, NULL); + ret = dict_foreach (dict, _handle_entry_create_keyvalue_pair, &filler); out: return ret; @@ -1064,3 +1274,259 @@ posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd) return ret; } + +static void * +posix_health_check_thread_proc (void *data) +{ + xlator_t *this = NULL; + struct posix_private *priv = NULL; + uint32_t interval = 0; + int ret = -1; + struct stat sb = {0, }; + + this = data; + priv = this->private; + + /* prevent races when the interval is updated */ + interval = priv->health_check_interval; + if (interval == 0) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "health-check thread started, " + "interval = %d seconds", interval); + + while (1) { + /* aborting sleep() is a request to exit this thread, sleep() + * will normally not return when cancelled */ + ret = sleep (interval); + if (ret > 0) + break; + + /* prevent thread errors while doing the health-check(s) */ + pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); + + /* Do the health-check, it should be moved to its own function + * in case it gets more complex. */ + ret = stat (priv->base_path, &sb); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "stat() on %s returned: %s", priv->base_path, + strerror (errno)); + goto abort; + } + + pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "health-check thread exiting"); + + LOCK (&priv->lock); + { + priv->health_check_active = _gf_false; + } + UNLOCK (&priv->lock); + + return NULL; + +abort: + /* health-check failed */ + gf_log (this->name, GF_LOG_EMERG, "health-check failed, going down"); + xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this); + + ret = sleep (30); + if (ret == 0) { + gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGTERM"); + kill (getpid(), SIGTERM); + } + + ret = sleep (30); + if (ret == 0) { + gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGKILL"); + kill (getpid(), SIGKILL); + } + + return NULL; +} + +void +posix_spawn_health_check_thread (xlator_t *xl) +{ + struct posix_private *priv = NULL; + int ret = -1; + + priv = xl->private; + + LOCK (&priv->lock); + { + /* cancel the running thread */ + if (priv->health_check_active == _gf_true) { + pthread_cancel (priv->health_check); + priv->health_check_active = _gf_false; + } + + /* prevent scheduling a check in a tight loop */ + if (priv->health_check_interval == 0) + goto unlock; + + ret = gf_thread_create (&priv->health_check, NULL, + posix_health_check_thread_proc, xl); + if (ret < 0) { + priv->health_check_interval = 0; + priv->health_check_active = _gf_false; + gf_log (xl->name, GF_LOG_ERROR, + "unable to setup health-check thread: %s", + strerror (errno)); + goto unlock; + } + + /* run the thread detached, resources will be freed on exit */ + pthread_detach (priv->health_check); + priv->health_check_active = _gf_true; + } +unlock: + UNLOCK (&priv->lock); +} + +int +posix_fsyncer_pick (xlator_t *this, struct list_head *head) +{ + struct posix_private *priv = NULL; + int count = 0; + + priv = this->private; + pthread_mutex_lock (&priv->fsync_mutex); + { + while (list_empty (&priv->fsyncs)) + pthread_cond_wait (&priv->fsync_cond, + &priv->fsync_mutex); + + count = priv->fsync_queue_count; + priv->fsync_queue_count = 0; + list_splice_init (&priv->fsyncs, head); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return count; +} + + +void +posix_fsyncer_process (xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync) +{ + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_private *priv = NULL; + + priv = this->private; + + ret = posix_fd_ctx_get (stub->args.fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not get fdctx for fd(%s)", + uuid_utoa (stub->args.fd->inode->gfid)); + call_unwind_error (stub, -1, EINVAL); + return; + } + + if (do_fsync) { +#ifdef HAVE_FDATASYNC + if (stub->args.datasync) + ret = fdatasync (pfd->fd); + else +#endif + ret = fsync (pfd->fd); + } else { + ret = 0; + } + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "could not fstat fd(%s)", + uuid_utoa (stub->args.fd->inode->gfid)); + call_unwind_error (stub, -1, errno); + return; + } + + call_unwind_error (stub, 0, 0); +} + + +static void +posix_fsyncer_syncfs (xlator_t *this, struct list_head *head) +{ + call_stub_t *stub = NULL; + struct posix_fd *pfd = NULL; + int ret = -1; + + stub = list_entry (head->prev, call_stub_t, list); + ret = posix_fd_ctx_get (stub->args.fd, this, &pfd); + if (ret) + return; + +#ifdef GF_LINUX_HOST_OS + /* syncfs() is not "declared" in RHEL's glibc even though + the kernel has support. + */ +#include <sys/syscall.h> +#include <unistd.h> +#ifdef SYS_syncfs + syscall (SYS_syncfs, pfd->fd); +#else + sync(); +#endif +#else + sync(); +#endif +} + + +void * +posix_fsyncer (void *d) +{ + xlator_t *this = d; + struct posix_private *priv = NULL; + call_stub_t *stub = NULL; + call_stub_t *tmp = NULL; + struct list_head list; + int count = 0; + gf_boolean_t do_fsync = _gf_true; + + priv = this->private; + + for (;;) { + INIT_LIST_HEAD (&list); + + count = posix_fsyncer_pick (this, &list); + + usleep (priv->batch_fsync_delay_usec); + + gf_log (this->name, GF_LOG_DEBUG, + "picked %d fsyncs", count); + + switch (priv->batch_fsync_mode) { + case BATCH_NONE: + case BATCH_REVERSE_FSYNC: + break; + case BATCH_SYNCFS: + case BATCH_SYNCFS_SINGLE_FSYNC: + case BATCH_SYNCFS_REVERSE_FSYNC: + posix_fsyncer_syncfs (this, &list); + break; + } + + if (priv->batch_fsync_mode == BATCH_SYNCFS) + do_fsync = _gf_false; + else + do_fsync = _gf_true; + + list_for_each_entry_safe_reverse (stub, tmp, &list, list) { + list_del_init (&stub->list); + + posix_fsyncer_process (this, stub, do_fsync); + + if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC) + do_fsync = _gf_false; + } + } +} |
