/* Copyright (c) 2006-2017 Red Hat, Inc. This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser General Public License, version 3 or any later version (LGPLv3 or later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ #define __XOPEN_SOURCE 500 /* for SEEK_HOLE and SEEK_DATA */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef GF_BSD_HOST_OS #include #endif /* GF_BSD_HOST_OS */ #ifdef HAVE_LINKAT #include #endif /* HAVE_LINKAT */ #include #include #include #include "posix-handle.h" #include #include #include #include #include #include #include #include "glusterfs3-xdr.h" #include #include "posix-messages.h" #include "posix-metadata.h" #include #include "posix-gfid-path.h" #include extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 #undef HAVE_SET_FSID #ifdef HAVE_SET_FSID #define DECLARE_OLD_FS_ID_VAR \ uid_t old_fsuid; \ gid_t old_fsgid; #define SET_FS_ID(uid, gid) \ do { \ old_fsuid = setfsuid(uid); \ old_fsgid = setfsgid(gid); \ } while (0) #define SET_TO_OLD_FS_ID() \ do { \ setfsuid(old_fsuid); \ setfsgid(old_fsgid); \ } while (0) #else #define DECLARE_OLD_FS_ID_VAR #define SET_FS_ID(uid, gid) #define SET_TO_OLD_FS_ID() #endif /* Setting microseconds or nanoseconds depending on what's supported: The passed in `tv` can be struct timespec if supported (better, because it supports nanosecond resolution) or struct timeval otherwise. */ #if HAVE_UTIMENSAT #define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs #else #define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ tv.tv_usec = nanosecs / 1000 #endif static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, NULL}; void posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd, char *loc) { int ret = 0; uuid_t uuid; if (!dict_get_sizen(req, GF_CS_OBJECT_STATUS)) return; if (!(*rsp)) { *rsp = dict_new(); if (!(*rsp)) { return; } } if (fd != -1) { if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); if (ret > 0) { ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, true); if (ret) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "%s: Failed to set " "dictionary value for %s for fd %d", uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd); } } else { gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d", GF_CS_XATTR_ARCHIVE_UUID, fd); } } } else { if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); if (ret > 0) { ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, true); if (ret) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "%s: Failed to set " "dictionary value for %s for loc %s", uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc); } } else { gf_msg_debug(this->name, 0, "getxattr failed on %s for %s", GF_CS_XATTR_ARCHIVE_UUID, loc); } } } return; } int32_t posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { struct iatt buf = { 0, }; int32_t op_ret = -1; int32_t op_errno = 0; struct posix_private *priv = NULL; char *real_path = NULL; dict_t *xattr_rsp = NULL; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(loc, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); SET_FS_ID(frame->root->uid, frame->root->gid); MAKE_INODE_HANDLE(real_path, this, loc, &buf); if (op_ret == -1) { op_errno = errno; if (op_errno == ENOENT) { gf_msg_debug(this->name, 0, "lstat on gfid-handle %s (path: %s)" "failed: %s", real_path ? real_path : "", loc->path, strerror(op_errno)); } else { gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED, "lstat on gfid-handle %s (path: %s) failed", real_path ? real_path : "", loc->path); } goto out; } if (xdata) { xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, &buf); posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, &xattr_rsp, _gf_true); posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path); } posix_update_iatt_buf(&buf, -1, real_path, xdata); op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xattr_rsp); if (xattr_rsp) dict_unref(xattr_rsp); return 0; } static int posix_do_chmod(xlator_t *this, const char *path, struct iatt *stbuf) { int32_t ret = -1; mode_t mode = 0; mode_t mode_bit = 0; struct posix_private *priv = NULL; struct stat stat; int is_symlink = 0; priv = this->private; VALIDATE_OR_GOTO(priv, out); ret = sys_lstat(path, &stat); if (ret != 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED, "lstat failed: %s", path); goto out; } if (S_ISLNK(stat.st_mode)) is_symlink = 1; if (S_ISDIR(stat.st_mode)) { mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); mode_bit = (mode & priv->create_directory_mask) | priv->force_directory_mode; mode = posix_override_umask(mode, mode_bit); } else { mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); mode_bit = (mode & priv->create_mask) | priv->force_create_mode; mode = posix_override_umask(mode, mode_bit); } ret = lchmod(path, mode); if ((ret == -1) && (errno == ENOSYS)) { /* in Linux symlinks are always in mode 0777 and no such call as lchmod exists. */ gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno)); if (is_symlink) { ret = 0; goto out; } ret = sys_chmod(path, mode); } out: return ret; } static int posix_do_chown(xlator_t *this, const char *path, struct iatt *stbuf, int32_t valid) { int32_t ret = -1; uid_t uid = -1; gid_t gid = -1; if (valid & GF_SET_ATTR_UID) uid = stbuf->ia_uid; if (valid & GF_SET_ATTR_GID) gid = stbuf->ia_gid; ret = sys_lchown(path, uid, gid); return ret; } static int posix_do_utimes(xlator_t *this, const char *path, struct iatt *stbuf, int valid) { int32_t ret = -1; #if defined(HAVE_UTIMENSAT) struct timespec tv[2] = {{ 0, }, { 0, }}; #else struct timeval tv[2] = {{ 0, }, { 0, }}; #endif struct stat stat; int is_symlink = 0; ret = sys_lstat(path, &stat); if (ret != 0) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%s", path); goto out; } if (S_ISLNK(stat.st_mode)) is_symlink = 1; if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { tv[0].tv_sec = stbuf->ia_atime; SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec); } else { /* atime is not given, use current values */ tv[0].tv_sec = ST_ATIM_SEC(&stat); SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC(&stat)); } if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { tv[1].tv_sec = stbuf->ia_mtime; SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec); } else { /* mtime is not given, use current values */ tv[1].tv_sec = ST_MTIM_SEC(&stat); SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC(&stat)); } ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); if ((ret == -1) && (errno == ENOSYS)) { gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno)); if (is_symlink) { ret = 0; goto out; } ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); } out: return ret; } int posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = 0; struct iatt statpre = { 0, }; struct iatt statpost = { 0, }; dict_t *xattr_rsp = NULL; struct posix_private *priv = NULL; priv = this->private; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(loc, out); SET_FS_ID(frame->root->uid, frame->root->gid); MAKE_INODE_HANDLE(real_path, this, loc, &statpre); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path ? real_path : "", loc->path); goto out; } if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { op_ret = posix_do_chown(this, real_path, stbuf, valid); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, "setattr (chown) on %s " "failed", real_path); goto out; } } if (valid & GF_SET_ATTR_MODE) { op_ret = posix_do_chmod(this, real_path, stbuf); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED, "setattr (chmod) on gfid-handle %s (path: %s) " "failed", real_path, loc->path); goto out; } } if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { op_ret = posix_do_utimes(this, real_path, stbuf, valid); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, "setattr (utimes) on gfid-handle %s (path: %s) " "failed", real_path, loc->path); goto out; } posix_update_utime_in_mdata(this, real_path, -1, loc->inode, &frame->root->ctime, stbuf, valid); } if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { posix_update_ctime_in_mdata(this, real_path, -1, loc->inode, &frame->root->ctime, stbuf, valid); } if (!valid) { op_ret = sys_lchown(real_path, -1, -1); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, "lchown (gfid-handle: %s, path: %s, -1, -1) " "failed", real_path, loc->path); goto out; } } op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &statpost, _gf_false); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path, loc->path); goto out; } posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost); if (xdata) xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, &statpost); posix_update_iatt_buf(&statpre, -1, real_path, xdata); posix_update_iatt_buf(&statpost, -1, real_path, xdata); op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, &statpre, &statpost, xattr_rsp); if (xattr_rsp) dict_unref(xattr_rsp); return 0; } int32_t posix_do_fchown(xlator_t *this, int fd, struct iatt *stbuf, int32_t valid) { int ret = -1; uid_t uid = -1; gid_t gid = -1; if (valid & GF_SET_ATTR_UID) uid = stbuf->ia_uid; if (valid & GF_SET_ATTR_GID) gid = stbuf->ia_gid; ret = sys_fchown(fd, uid, gid); return ret; } int32_t posix_do_fchmod(xlator_t *this, int fd, struct iatt *stbuf) { int32_t ret = -1; mode_t mode = 0; mode_t mode_bit = 0; struct posix_private *priv = NULL; priv = this->private; VALIDATE_OR_GOTO(priv, out); mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); mode_bit = (mode & priv->create_mask) | priv->force_create_mode; mode = posix_override_umask(mode, mode_bit); ret = sys_fchmod(fd, mode); out: return ret; } static int posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid) { int32_t ret = -1; struct timeval tv[2] = {{ 0, }, { 0, }}; struct stat stat = { 0, }; ret = sys_fstat(fd, &stat); if (ret != 0) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%d", fd); goto out; } if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { tv[0].tv_sec = stbuf->ia_atime; tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; } else { /* atime is not given, use current values */ tv[0].tv_sec = ST_ATIM_SEC(&stat); tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000; } if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { tv[1].tv_sec = stbuf->ia_mtime; tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; } else { /* mtime is not given, use current values */ tv[1].tv_sec = ST_MTIM_SEC(&stat); tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000; } ret = sys_futimes(fd, tv); if (ret == -1) gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, "%d", fd); out: return ret; } int posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct iatt statpre = { 0, }; struct iatt statpost = { 0, }; struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; dict_t *xattr_rsp = NULL; int32_t ret = -1; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); goto out; } op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpre); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fsetattr (fstat) failed on fd=%p", fd); goto out; } if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { op_ret = posix_do_fchown(this, pfd->fd, stbuf, valid); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED, "fsetattr (fchown) failed" " on fd=%p", fd); goto out; } } if (valid & GF_SET_ATTR_MODE) { op_ret = posix_do_fchmod(this, pfd->fd, stbuf); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHMOD_FAILED, "fsetattr (fchmod) failed" " on fd=%p", fd); goto out; } } if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { op_ret = posix_do_futimes(this, pfd->fd, stbuf, valid); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, "fsetattr (futimes) on " "failed fd=%p", fd); goto out; } posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, &frame->root->ctime, stbuf, valid); } if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode, &frame->root->ctime, stbuf, valid); } if (!valid) { op_ret = sys_fchown(pfd->fd, -1, -1); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED, "fchown (%d, -1, -1) failed", pfd->fd); goto out; } } op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpost); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fsetattr (fstat) failed on fd=%p", fd); goto out; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &statpost); if (xdata) xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata, &statpost); op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, &statpre, &statpost, xattr_rsp); if (xattr_rsp) dict_unref(xattr_rsp); return 0; } static int32_t posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, off_t offset, size_t len, struct iatt *statpre, struct iatt *statpost, dict_t *xdata, dict_t **rsp_xdata) { int32_t ret = -1; int32_t op_errno = 0; struct posix_fd *pfd = NULL; gf_boolean_t locked = _gf_false; posix_inode_ctx_t *ctx = NULL; struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); priv = this->private; /* fallocate case is special so call posix_disk_space_check separately for every fallocate fop instead of calling posix_disk_space with thread after every 5 sec sleep to working correctly storage.reserve option behaviour */ if (priv->disk_reserve) posix_disk_space_check(this); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); goto out; } ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); if (ret < 0) { ret = -ENOMEM; goto out; } if (xdata && dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { locked = _gf_true; pthread_mutex_lock(&ctx->write_atomic_lock); } ret = posix_fdstat(this, fd->inode, pfd->fd, statpre); if (ret == -1) { ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fallocate (fstat) failed on fd=%p", fd); goto out; } if (xdata) { ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL, xdata, rsp_xdata, _gf_false); if (ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); ret = -EIO; goto out; } } ret = sys_fallocate(pfd->fd, flags, offset, len); if (ret == -1) { ret = -errno; gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED, "fallocate failed on %s offset: %jd, " "len:%zu, flags: %d", uuid_utoa(fd->inode->gfid), offset, len, flags); goto out; } ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); if (ret == -1) { ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fallocate (fstat) failed on fd=%p", fd); goto out; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); out: if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } SET_TO_OLD_FS_ID(); if (ret == ENOSPC) ret = -ENOSPC; return ret; } char * _page_aligned_alloc(size_t size, char **aligned_buf) { char *alloc_buf = NULL; char *buf = NULL; alloc_buf = GF_CALLOC(1, (size + ALIGN_SIZE), gf_posix_mt_char); if (!alloc_buf) goto out; /* page aligned buffer */ buf = GF_ALIGN_BUF(alloc_buf, ALIGN_SIZE); *aligned_buf = buf; out: return alloc_buf; } static int32_t _posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct) { off_t num_vect = 0; off_t num_loop = 1; off_t idx = 0; int32_t op_ret = -1; int32_t vect_size = VECTOR_SIZE; off_t remain = 0; off_t extra = 0; struct iovec *vector = NULL; char *iov_base = NULL; char *alloc_buf = NULL; if (len == 0) return 0; if (len < VECTOR_SIZE) vect_size = len; num_vect = len / (vect_size); remain = len % vect_size; if (num_vect > MAX_NO_VECT) { extra = num_vect % MAX_NO_VECT; num_loop = num_vect / MAX_NO_VECT; num_vect = MAX_NO_VECT; } vector = GF_CALLOC(num_vect, sizeof(struct iovec), gf_common_mt_iovec); if (!vector) return -1; if (o_direct) { alloc_buf = _page_aligned_alloc(vect_size, &iov_base); if (!alloc_buf) { GF_FREE(vector); return -1; } } else { iov_base = GF_CALLOC(vect_size, sizeof(char), gf_common_mt_char); if (!iov_base) { GF_FREE(vector); return -1; } } for (idx = 0; idx < num_vect; idx++) { vector[idx].iov_base = iov_base; vector[idx].iov_len = vect_size; } if (sys_lseek(fd, offset, SEEK_SET) < 0) { op_ret = -1; goto err; } for (idx = 0; idx < num_loop; idx++) { op_ret = sys_writev(fd, vector, num_vect); if (op_ret < 0) goto err; if (op_ret != (vect_size * num_vect)) { op_ret = -1; errno = ENOSPC; goto err; } } if (extra) { op_ret = sys_writev(fd, vector, extra); if (op_ret < 0) goto err; if (op_ret != (vect_size * extra)) { op_ret = -1; errno = ENOSPC; goto err; } } if (remain) { vector[0].iov_len = remain; op_ret = sys_writev(fd, vector, 1); if (op_ret < 0) goto err; if (op_ret != remain) { op_ret = -1; errno = ENOSPC; goto err; } } err: if (o_direct) GF_FREE(alloc_buf); else GF_FREE(iov_base); GF_FREE(vector); return op_ret; } static int32_t posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, struct iatt *statpre, struct iatt *statpost, dict_t *xdata, dict_t **rsp_xdata) { int32_t ret = -1; int32_t op_errno = 0; int32_t flags = 0; struct posix_fd *pfd = NULL; gf_boolean_t locked = _gf_false; posix_inode_ctx_t *ctx = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); goto out; } ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); if (ret < 0) { ret = -ENOMEM; goto out; } if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { locked = _gf_true; pthread_mutex_lock(&ctx->write_atomic_lock); } ret = posix_fdstat(this, fd->inode, pfd->fd, statpre); if (ret == -1) { ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd = %p", fd); goto out; } if (xdata) { ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL, xdata, rsp_xdata, _gf_false); if (ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state " "check failed, fd %p", fd); ret = -EIO; goto out; } } posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata); /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. * If it fails, fall back to _posix_do_zerofill() and an optional fsync. */ flags = FALLOC_FL_ZERO_RANGE; ret = sys_fallocate(pfd->fd, flags, offset, len); if (ret == 0) { goto fsync; } else { ret = -errno; if ((ret != -ENOSYS) && (ret != -EOPNOTSUPP)) { goto out; } } ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); if (ret < 0) { ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED, "zerofill failed on fd %d length %" PRId64, pfd->fd, len); goto out; } fsync: if (pfd->flags & (O_SYNC | O_DSYNC)) { ret = sys_fsync(pfd->fd); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_WRITEV_FAILED, "fsync() in writev on fd" "%d failed", pfd->fd); ret = -errno; goto out; } } ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); if (ret == -1) { ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "post operation fstat failed on fd=%p", fd); goto out; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); out: if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } SET_TO_OLD_FS_ID(); return ret; } int32_t posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, off_t offset, size_t len, dict_t *xdata) { int32_t ret; int32_t flags = 0; struct iatt statpre = { 0, }; struct iatt statpost = { 0, }; dict_t *rsp_xdata = NULL; #ifdef FALLOC_FL_KEEP_SIZE if (keep_size) flags = FALLOC_FL_KEEP_SIZE; #endif /* FALLOC_FL_KEEP_SIZE */ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, &statpost, xdata, &rsp_xdata); if (ret < 0) goto err; STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata); return 0; err: STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata); return 0; } int32_t posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { int32_t ret; dict_t *rsp_xdata = NULL; #ifndef FALLOC_FL_KEEP_SIZE ret = EOPNOTSUPP; #else /* FALLOC_FL_KEEP_SIZE */ int32_t flags = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; struct iatt statpre = { 0, }; struct iatt statpost = { 0, }; ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, &statpost, xdata, &rsp_xdata); if (ret < 0) goto err; STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, rsp_xdata); return 0; err: #endif /* FALLOC_FL_KEEP_SIZE */ STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, rsp_xdata); return 0; } int32_t posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { int32_t ret = 0; struct iatt statpre = { 0, }; struct iatt statpost = { 0, }; struct posix_private *priv = NULL; int op_ret = -1; int op_errno = EINVAL; dict_t *rsp_xdata = NULL; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); priv = this->private; DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost, xdata, &rsp_xdata); if (ret < 0) { op_ret = -1; op_errno = -ret; goto out; } STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata); return 0; out: STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, rsp_xdata); return 0; } int32_t posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { /* * IPC is for inter-translator communication. If one gets here, it * means somebody sent one that nobody else recognized, which is an * error much like an uncaught exception. */ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE, "GF_LOG_IPC(%d) not handled", op); STACK_UNWIND_STRICT(ipc, frame, -1, EOPNOTSUPP, NULL); return 0; } int32_t posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, gf_seek_what_t what, dict_t *xdata) { #ifdef HAVE_SEEK_HOLE struct posix_fd *pfd = NULL; off_t ret = -1; int err = 0; int whence = 0; struct iatt preop = { 0, }; dict_t *rsp_xdata = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); switch (what) { case GF_SEEK_DATA: whence = SEEK_DATA; break; case GF_SEEK_HOLE: whence = SEEK_HOLE; break; default: err = ENOTSUP; gf_msg(this->name, GF_LOG_ERROR, ENOTSUP, P_MSG_SEEK_UNKOWN, "don't know what to seek"); goto out; } ret = posix_fd_ctx_get(fd, this, &pfd, &err); if (ret < 0) { gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); goto out; } if (xdata) { ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); if (ret == -1) { ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd); goto out; } ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, &rsp_xdata, _gf_false); if (ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); ret = -EIO; goto out; } } ret = sys_lseek(pfd->fd, offset, whence); if (ret == -1) { err = errno; gf_msg(this->name, fop_log_level(GF_FOP_SEEK, err), err, P_MSG_SEEK_FAILED, "seek failed on fd %d length %" PRId64, pfd->fd, offset); goto out; } out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(seek, frame, (ret == -1 ? -1 : 0), err, (ret == -1 ? -1 : ret), rsp_xdata); #else STACK_UNWIND_STRICT(seek, frame, -1, EINVAL, 0, NULL); #endif return 0; } int32_t posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { char *real_path = NULL; int32_t op_ret = -1; int32_t op_errno = EINVAL; DIR *dir = NULL; struct posix_fd *pfd = NULL; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(loc, out); VALIDATE_OR_GOTO(fd, out); SET_FS_ID(frame->root->uid, frame->root->gid); MAKE_INODE_HANDLE(real_path, this, loc, NULL); if (!real_path) { op_errno = ESTALE; goto out; } op_ret = -1; dir = sys_opendir(real_path); if (dir == NULL) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED, "opendir failed on gfid-handle: %s (path: %s)", real_path, loc->path); goto out; } op_ret = dirfd(dir); if (op_ret < 0) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED, "dirfd() failed (path: %s, gfid-handle: %s", loc->path, real_path); goto out; } pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); if (!pfd) { op_errno = errno; goto out; } pfd->dir = dir; pfd->dir_eof = -1; pfd->fd = op_ret; op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); if (op_ret) gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd" "context path=%s " "gfid-handle= %s,fd=%p", loc->path, real_path, fd); posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL); op_ret = 0; out: if (op_ret == -1) { if (dir) { (void)sys_closedir(dir); dir = NULL; } if (pfd) { GF_FREE(pfd); pfd = NULL; } } SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL); return 0; } int32_t posix_releasedir(xlator_t *this, fd_t *fd) { struct posix_fd *pfd = NULL; uint64_t tmp_pfd = 0; int ret = 0; glusterfs_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); ret = fd_ctx_del(fd, this, &tmp_pfd); if (ret < 0) { gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd); goto out; } pfd = (struct posix_fd *)(long)tmp_pfd; if (!pfd->dir) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, "pfd->dir is NULL for fd=%p", fd); goto out; } ctx = THIS->ctx; pthread_mutex_lock(&ctx->janitor_lock); { INIT_LIST_HEAD(&pfd->list); list_add_tail(&pfd->list, &ctx->janitor_fds); pthread_cond_signal(&ctx->janitor_cond); } pthread_mutex_unlock(&ctx->janitor_lock); /*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); sys_closedir(pfd->dir); GF_FREE(pfd); */ out: return 0; } int32_t posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata) { char *dest = NULL; int32_t op_ret = -1; int32_t op_errno = EINVAL; char *real_path = NULL; struct iatt stbuf = { 0, }; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(loc, out); SET_FS_ID(frame->root->uid, frame->root->gid); dest = alloca(size + 1); MAKE_INODE_HANDLE(real_path, this, loc, &stbuf); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "lstat on %s failed", loc->path ? loc->path : ""); goto out; } op_ret = sys_readlink(real_path, dest, size); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED, "readlink on gfid-handle: %s (path: %s) failed", real_path, loc->path); goto out; } dest[op_ret] = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); return 0; } int32_t posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = 0; struct posix_private *priv = NULL; struct iatt prebuf = { 0, }; struct iatt postbuf = { 0, }; dict_t *rsp_xdata = NULL; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(loc, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); SET_FS_ID(frame->root->uid, frame->root->gid); MAKE_INODE_HANDLE(real_path, this, loc, &prebuf); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "pre-operation lstat on (path: %s gfid-handle: %s) " "failed", loc->path, real_path ? real_path : ""); goto out; } if (xdata) { op_ret = posix_cs_maintenance(this, NULL, loc, NULL, &prebuf, real_path, xdata, &rsp_xdata, _gf_false); if (op_ret == -1) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, path %s", loc->path); op_errno = EIO; goto out; } } posix_update_iatt_buf(&prebuf, -1, real_path, xdata); op_ret = sys_truncate(real_path, offset); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, "truncate on gfid-handle: %s (path: %s) failed", real_path, loc->path); goto out; } op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postbuf, _gf_false); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "lstat on gfid-handle %s (path: %s) failed", real_path, loc->path); goto out; } posix_set_ctime(frame, this, real_path, -1, loc->inode, &postbuf); op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, &prebuf, &postbuf, NULL); return 0; } int32_t posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = NULL; int32_t _fd = -1; struct posix_fd *pfd = NULL; struct posix_private *priv = NULL; struct iatt preop = { 0, }; dict_t *rsp_xdata = NULL; struct iatt stbuf = { 0, }; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(this->private, out); VALIDATE_OR_GOTO(loc, out); VALIDATE_OR_GOTO(fd, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); if (loc->inode && ((loc->inode->ia_type == IA_IFBLK) || (loc->inode->ia_type == IA_IFCHR))) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, "open received on a block/char file (%s)", uuid_utoa(loc->inode->gfid)); op_errno = EINVAL; goto out; } if (flags & O_CREAT) DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_INODE_HANDLE(real_path, this, loc, &stbuf); if (!real_path) { op_ret = -1; op_errno = ESTALE; goto out; } if (IA_ISLNK(stbuf.ia_type)) { op_ret = -1; op_errno = ELOOP; goto out; } op_ret = -1; SET_FS_ID(frame->root->uid, frame->root->gid); if (priv->o_direct) flags |= O_DIRECT; _fd = sys_open(real_path, flags, priv->force_create_mode); if (_fd == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, "open on gfid-handle %s (path: %s), flags: %d", real_path, loc->path, flags); goto out; } posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf); pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); if (!pfd) { op_errno = errno; goto out; } pfd->flags = flags; pfd->fd = _fd; if (xdata) { op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); if (op_ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd); GF_FREE(pfd); goto out; } posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, &rsp_xdata, _gf_true); } op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); if (op_ret) gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd context gfid-handle=%s path=%s fd=%p", real_path, loc->path, fd); op_ret = 0; out: if (op_ret == -1) { if (_fd != -1) { sys_close(_fd); } } SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata); return 0; } int posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_private *priv = NULL; struct iobuf *iobuf = NULL; struct iobref *iobref = NULL; struct iovec vec = { 0, }; struct posix_fd *pfd = NULL; struct iatt stbuf = { 0, }; struct iatt preop = { 0, }; int ret = -1; dict_t *rsp_xdata = NULL; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); VALIDATE_OR_GOTO(fd->inode, out); VALIDATE_OR_GOTO(this->private, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, "readv received on a block/char file (%s)", uuid_utoa(fd->inode->gfid)); op_errno = EINVAL; goto out; } ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); goto out; } if (!size) { op_errno = EINVAL; gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_INVALID_ARGUMENT, "size=%" GF_PRI_SIZET, size); goto out; } iobuf = iobuf_get_page_aligned(this->ctx->iobuf_pool, size, ALIGN_SIZE); if (!iobuf) { op_errno = ENOMEM; goto out; } _fd = pfd->fd; if (xdata) { op_ret = posix_fdstat(this, fd->inode, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd); goto out; } op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, &rsp_xdata, _gf_false); if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); op_errno = EIO; goto out; } } posix_update_iatt_buf(&preop, _fd, NULL, xdata); op_ret = sys_pread(_fd, iobuf->ptr, size, offset); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READ_FAILED, "read failed on gfid=%s, " "fd=%p, offset=%" PRIu64 " size=%" GF_PRI_SIZET ", " "buf=%p", uuid_utoa(fd->inode->gfid), fd, offset, size, iobuf->ptr); goto out; } GF_ATOMIC_ADD(priv->read_value, op_ret); vec.iov_base = iobuf->ptr; vec.iov_len = op_ret; iobref = iobref_new(); iobref_add(iobref, iobuf); /* * readv successful, and we need to get the stat of the file * we read from */ op_ret = posix_fdstat(this, fd->inode, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fstat failed on fd=%p", fd); goto out; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &stbuf); /* Hack to notify higher layers of EOF. */ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) op_errno = ENOENT; op_ret = vec.iov_len; out: STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref, rsp_xdata); if (iobref) iobref_unref(iobref); if (iobuf) iobuf_unref(iobuf); return 0; } int32_t __posix_pwritev(int fd, struct iovec *vector, int count, off_t offset) { int32_t op_ret = 0; int idx = 0; int retval = 0; off_t internal_off = 0; if (!vector) return -EFAULT; internal_off = offset; for (idx = 0; idx < count; idx++) { retval = sys_pwrite(fd, vector[idx].iov_base, vector[idx].iov_len, internal_off); if (retval == -1) { op_ret = -errno; goto err; } op_ret += retval; internal_off += retval; } err: return op_ret; } int32_t __posix_writev(int fd, struct iovec *vector, int count, off_t startoff, int odirect) { int32_t op_ret = 0; int idx = 0; int max_buf_size = 0; int retval = 0; char *buf = NULL; char *alloc_buf = NULL; off_t internal_off = 0; /* Check for the O_DIRECT flag during open() */ if (!odirect) return __posix_pwritev(fd, vector, count, startoff); for (idx = 0; idx < count; idx++) { if (max_buf_size < vector[idx].iov_len) max_buf_size = vector[idx].iov_len; } alloc_buf = _page_aligned_alloc(max_buf_size, &buf); if (!alloc_buf) { op_ret = -errno; goto err; } internal_off = startoff; for (idx = 0; idx < count; idx++) { memcpy(buf, vector[idx].iov_base, vector[idx].iov_len); /* not sure whether writev works on O_DIRECT'd fd */ retval = sys_pwrite(fd, buf, vector[idx].iov_len, internal_off); if (retval == -1) { op_ret = -errno; goto err; } op_ret += retval; internal_off += retval; } err: GF_FREE(alloc_buf); return op_ret; } dict_t * _fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) { dict_t *rsp_xdata = NULL; int32_t ret = 0; inode_t *inode = NULL; if (fd) inode = fd->inode; if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) { gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, P_MSG_XATTR_FAILED, "fd: %p inode: %p" "gfid:%s", fd, inode ? inode : 0, inode ? uuid_utoa(inode->gfid) : "N/A"); goto out; } if (!xdata) goto out; rsp_xdata = dict_new(); if (!rsp_xdata) goto out; if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) { ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, fd->inode->fd_count); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "%s: Failed to set " "dictionary value for %s", uuid_utoa(fd->inode->gfid), GLUSTERFS_OPEN_FD_COUNT); } } if (dict_get(xdata, GLUSTERFS_ACTIVE_FD_COUNT)) { ret = dict_set_uint32(rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT, fd->inode->active_fd_count); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "%s: Failed to set " "dictionary value for %s", uuid_utoa(fd->inode->gfid), GLUSTERFS_ACTIVE_FD_COUNT); } } if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) { ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "%s: Failed to set " "dictionary value for %s", uuid_utoa(fd->inode->gfid), GLUSTERFS_WRITE_IS_APPEND); } } out: return rsp_xdata; } int32_t posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; struct iatt preop = { 0, }; struct iatt postop = { 0, }; int ret = -1; dict_t *rsp_xdata = NULL; int is_append = 0; gf_boolean_t locked = _gf_false; gf_boolean_t write_append = _gf_false; gf_boolean_t update_atomic = _gf_false; posix_inode_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); VALIDATE_OR_GOTO(fd->inode, out); VALIDATE_OR_GOTO(vector, out); VALIDATE_OR_GOTO(this->private, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, "writev received on a block/char file (%s)", uuid_utoa(fd->inode->gfid)); op_errno = EINVAL; goto out; } ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); goto out; } _fd = pfd->fd; ret = posix_check_internal_writes(this, fd, _fd, xdata); if (ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "possible overwrite from internal client, fd=%p", fd); op_ret = -1; op_errno = EBUSY; goto out; } if (xdata) { if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) write_append = _gf_true; if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) update_atomic = _gf_true; } /* The write_is_append check and write must happen atomically. Else another write can overtake this write after the check and get written earlier. So lock before preop-stat and unlock after write. */ /* * The update_atomic option is to instruct posix to do prestat, * write and poststat atomically. This is to prevent any modification to * ia_size and ia_blocks until poststat and the diff in their values * between pre and poststat could be of use for some translators (shard * as of today). */ op_ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); if (op_ret < 0) { op_errno = ENOMEM; goto out; } if (write_append || update_atomic) { locked = _gf_true; pthread_mutex_lock(&ctx->write_atomic_lock); } op_ret = posix_fdstat(this, fd->inode, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd); goto out; } if (xdata) { op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, &rsp_xdata, _gf_false); if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); op_errno = EIO; goto out; } } posix_update_iatt_buf(&preop, _fd, NULL, xdata); if (locked && write_append) { if (preop.ia_size == offset || (fd->flags & O_APPEND)) is_append = 1; } op_ret = __posix_writev(_fd, vector, count, offset, (pfd->flags & O_DIRECT)); if (locked && (!update_atomic)) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED, "write failed: offset %" PRIu64 ",", offset); goto out; } rsp_xdata = _fill_writev_xdata(fd, xdata, this, is_append); /* writev successful, we also need to get the stat of * the file we wrote to */ ret = posix_fdstat(this, fd->inode, _fd, &postop); if (ret == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "post-operation fstat failed on fd=%p", fd); goto out; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } if (flags & (O_SYNC | O_DSYNC)) { ret = sys_fsync(_fd); if (ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITEV_FAILED, "fsync() in writev on fd %d failed", _fd); op_ret = -1; op_errno = errno; goto out; } } GF_ATOMIC_ADD(priv->write_value, op_ret); out: if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop, rsp_xdata); if (rsp_xdata) dict_unref(rsp_xdata); return 0; } int32_t posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, uint32_t flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd_in = -1; int _fd_out = -1; struct posix_private *priv = NULL; struct posix_fd *pfd_in = NULL; struct posix_fd *pfd_out = NULL; struct iatt preop_dst = { 0, }; struct iatt postop_dst = { 0, }; struct iatt stbuf = { 0, }; int ret = -1; dict_t *rsp_xdata = NULL; int is_append = 0; gf_boolean_t locked = _gf_false; gf_boolean_t update_atomic = _gf_false; posix_inode_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd_in, out); VALIDATE_OR_GOTO(fd_in->inode, out); VALIDATE_OR_GOTO(fd_out, out); VALIDATE_OR_GOTO(fd_out->inode, out); VALIDATE_OR_GOTO(this->private, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno)) goto out; if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno)) goto out; ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd_in); goto out; } _fd_in = pfd_in->fd; ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd_out); goto out; } _fd_out = pfd_out->fd; /* * Currently, the internal write is checked via xdata which * is set by some xlator above. It could be due to several of * the reasons such as healing or a snapshot operation happening * using copy_file_range. As of now (i.e. writing the patch with * this change) none of the xlators above posix are using the * internal write with copy_file_range. In future it might * change. Atleast as of now the hope is that, when that happens * this functon or fop does not require additional changes for * handling internal writes. */ ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata); if (ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "possible overwrite from internal client, fd=%p", fd_out); op_ret = -1; op_errno = EBUSY; goto out; } if (xdata) { if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) update_atomic = _gf_true; } /* * The update_atomic option is to instruct posix to do prestat, * write and poststat atomically. This is to prevent any modification to * ia_size and ia_blocks until poststat and the diff in their values * between pre and poststat could be of use for some translators. * This is similar to the atomic write operation. atmoic write is * (i.e. prestat + write + poststat) used by shard as of now. In case, * some xlator needs copy_file_range to be atomic from prestat and postat * prespective (i.e. prestat + copy_file_range + poststat) then it has * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata. */ op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx); if (op_ret < 0) { op_errno = ENOMEM; goto out; } if (update_atomic) { ret = pthread_mutex_lock(&ctx->write_atomic_lock); if (!ret) locked = _gf_true; else { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED, "failed to hold write atomic lock on %s", uuid_utoa(fd_out->inode->gfid)); goto out; } } op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd_out); goto out; } /* * Since, only the destination file (fd_out) is undergoing * modification, the write related tests are done on that. * i.e. this is treater similar to as if the destination file * undergoing write fop from maintenance perspective. */ if (xdata) { op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst, NULL, xdata, &rsp_xdata, _gf_false); if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd_out); op_errno = EIO; goto out; } } /* * NOTE: This is just doing a single execution of copy_file_range * system call. If the returned value of this system call is less * than len, then should we keep doing it in a for loop until the * copy_file_range of all the len bytes is done? * Check the example program provided in the man page of * copy_file_range. * If so, then a separate variables for both off_in and off_out * should be used which are initialized to off_in and off_out * that this function call receives, but then advanced by the * value returned by sys_copy_file_range and then use that as * off_in and off_out for next instance of copy_file_range execution. */ op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len, flags); if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, "copy_file_range failed: fd_in: %p (gfid: %s) ," " fd_out %p (gfid:%s)", fd_in, uuid_utoa(fd_in->inode->gfid), fd_out, uuid_utoa(fd_out->inode->gfid)); goto out; } /* * Let this be as it is for now. This function collects * infomration such as open fd count etc. So, even though * is_append does not apply to copy_file_range, for now, * allowing it to be recorded in the dict as _gf_false. */ rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append); /* copy_file_range successful, we also need to get the stat of * the file we wrote to (i.e. destination file or fd_out). */ ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst); if (ret == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "post-operation fstat failed on fd=%p", fd_out); goto out; } /* * Also perform the stat on the source fd (i.e. fd_in). For now, * allowing it to be done within the locked region if the request * is for atomic operation (and update) of copy_file_range. */ ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf); if (ret == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "post-operation fstat failed on fd=%p", fd_in); goto out; } /* * The core logic of what time attributes are to be updated * on a fop is decided at client side xlator utime. * All the remaining fops call posix_set_ctime function * to update the {a,m,c}time. But, for all the other fops, * the operation is happening on only one file (or inode). * But here, there are 2 fds (source and destination). Hence * the new function below to update the appropriate times for * both the source and the destination file. * For the source file, if at all anything has to be updated, * it would be atime (as that file is only read, not updated). * For the destination file, the attributes that require the * modification would be mtime and ctime. * What times have to be changed is actually determined by * utime xlator. But, all of them would be in frame->root->flags. * So, currently posix assumes that, the atime flag is for * the source file and the other 2 flags are for the destination * file. Since, the assumption is rigid (i.e. atime for source * and {m,c}time for destination), the below function is called * posix_set_ctime_cfr (cfr standing for copy_file_range). * FUTURE TODO: * In future, some other functionality or fop might operate * simultaneously on 2 files. Then, depending upon what that new * fop does or what are its requirements, the below function might * require changes to become generic for consumption in case of * simultaneous operations on 2 files. */ posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf, NULL, pfd_out->fd, fd_out->inode, &postop_dst); if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } /* * Record copy_file_range in priv->write_value for now. * If not needed, remove below section of code along with * this comment (or add comment to explain why it is not * needed). */ GF_ATOMIC_ADD(priv->write_value, op_ret); out: if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf, &preop_dst, &postop_dst, rsp_xdata); if (rsp_xdata) dict_unref(rsp_xdata); return 0; } int32_t posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { char *real_path = NULL; int32_t op_ret = -1; int32_t op_errno = 0; struct statvfs buf = { 0, }; struct posix_private *priv = NULL; int shared_by = 1; double percent = 0; uint64_t reserved_blocks = 0; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(loc, out); VALIDATE_OR_GOTO(this->private, out); MAKE_INODE_HANDLE(real_path, this, loc, NULL); if (!real_path) { op_ret = -1; op_errno = ESTALE; goto out; } priv = this->private; op_ret = sys_statvfs(real_path, &buf); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, "statvfs failed on gfid-handle %s (path: %s)", real_path, loc->path); goto out; } if (priv->disk_unit == 'p') { percent = priv->disk_reserve; reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5); } else { if (buf.f_bsize) { reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) / buf.f_bsize; } } if (buf.f_bfree > reserved_blocks) { buf.f_bfree = (buf.f_bfree - reserved_blocks); if (buf.f_bavail > buf.f_bfree) { buf.f_bavail = buf.f_bfree; } } else { buf.f_bfree = 0; buf.f_bavail = 0; } shared_by = priv->shared_brick_count; if (shared_by > 1) { buf.f_blocks /= shared_by; buf.f_bfree /= shared_by; buf.f_bavail /= shared_by; buf.f_files /= shared_by; buf.f_ffree /= shared_by; buf.f_favail /= shared_by; } if (!priv->export_statfs) { buf.f_blocks = 0; buf.f_bfree = 0; buf.f_bavail = 0; buf.f_files = 0; buf.f_ffree = 0; buf.f_favail = 0; } op_ret = 0; out: STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, NULL); return 0; } int32_t posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int ret = -1; struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL on fd=%p", fd); goto out; } op_ret = 0; out: STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_release(xlator_t *this, fd_t *fd) { struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; int ret = -1; uint64_t tmp_pfd = 0; glusterfs_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); priv = this->private; ctx = THIS->ctx; ret = fd_ctx_del(fd, this, &tmp_pfd); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); goto out; } pfd = (struct posix_fd *)(long)tmp_pfd; if (pfd->dir) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL, "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); } pthread_mutex_lock(&ctx->janitor_lock); { INIT_LIST_HEAD(&pfd->list); list_add_tail(&pfd->list, &ctx->janitor_fds); pthread_cond_signal(&ctx->janitor_cond); } pthread_mutex_unlock(&ctx->janitor_lock); if (!priv) goto out; out: return 0; } int posix_batch_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, dict_t *xdata) { call_stub_t *stub = NULL; struct posix_private *priv = NULL; priv = this->private; stub = fop_fsync_stub(frame, default_fsync, fd, datasync, xdata); if (!stub) { STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); return 0; } pthread_mutex_lock(&priv->fsync_mutex); { list_add_tail(&stub->list, &priv->fsyncs); priv->fsync_queue_count++; pthread_cond_signal(&priv->fsync_cond); } pthread_mutex_unlock(&priv->fsync_mutex); return 0; } int32_t posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_fd *pfd = NULL; int ret = -1; struct iatt preop = { 0, }; struct iatt postop = { 0, }; struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); SET_FS_ID(frame->root->uid, frame->root->gid); #ifdef GF_DARWIN_HOST_OS /* Always return success in case of fsync in MAC OS X */ op_ret = 0; goto out; #endif priv = this->private; if (priv->batch_fsync_mode && xdata && dict_get(xdata, "batch-fsync")) { posix_batch_fsync(frame, this, fd, datasync, xdata); return 0; } ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd not found in fd's ctx"); goto out; } _fd = pfd->fd; op_ret = posix_fdstat(this, fd->inode, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd); goto out; } if (datasync) { op_ret = sys_fdatasync(_fd); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED, "fdatasync on fd=%p" "failed:", fd); goto out; } } else { op_ret = sys_fsync(_fd); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED, "fsync on fd=%p " "failed", fd); goto out; } } op_ret = posix_fdstat(this, fd->inode, _fd, &postop); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, "post-operation fstat failed on fd=%p", fd); goto out; } op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, &preop, &postop, NULL); return 0; } static int gf_posix_xattr_enotsup_log; static int _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) { posix_xattr_filler_t *filler = NULL; filler = tmp; return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, filler->flags, filler->stbuf); } #ifdef GF_DARWIN_HOST_OS static int map_xattr_flags(int flags) { /* DARWIN has different defines on XATTR_ flags. There do not seem to be a POSIX standard Parse any other flags over. */ int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); if (GF_XATTR_CREATE & flags) darwinflags |= XATTR_CREATE; if (GF_XATTR_REPLACE & flags) darwinflags |= XATTR_REPLACE; return darwinflags; } #endif int32_t posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = NULL; char *acl_xattr = NULL; struct iatt preop = {0}; struct iatt postop = {0}; int32_t ret = 0; ssize_t acl_size = 0; dict_t *xattr = NULL; posix_xattr_filler_t filler = { 0, }; struct posix_private *priv = NULL; struct iatt tmp_stbuf = { 0, }; data_t *tdata = NULL; char *cs_var = NULL; gf_cs_obj_state state = -1; int i = 0; int len; struct mdata_iatt mdata_iatt = { 0, }; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(this->private, out); VALIDATE_OR_GOTO(loc, out); VALIDATE_OR_GOTO(dict, out); priv = this->private; DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_INODE_HANDLE(real_path, this, loc, NULL); if (!real_path) { op_ret = -1; op_errno = ESTALE; goto out; } ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); if (ret == 0) { /* This is initiated by lookup when ctime feature is enabled to create * "trusted.glusterfs.mdata" xattr if not present. These are the files * which were created when ctime feature is disabled. */ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, &mdata_iatt, &op_errno); if (ret != 0) { op_ret = -1; } goto out; } posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false); op_ret = -1; dict_del(dict, GFID_XATTR_KEY); dict_del(dict, GF_XATTR_VOL_ID_KEY); /* the io-stats-dump key should not reach disk */ dict_del(dict, GF_XATTR_IOSTATS_DUMP_KEY); tdata = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE); if (tdata) { /*TODO: move the following to a different function */ LOCK(&loc->inode->lock); { state = posix_cs_check_status(this, real_path, NULL, &preop); if (state != GF_CS_LOCAL) { op_errno = EINVAL; ret = posix_cs_set_state(this, &xattr, state, real_path, NULL); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed"); } goto unlock; } ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &tmp_stbuf, _gf_true); if (ret) { op_errno = EINVAL; goto unlock; } cs_var = alloca(4096); sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime); /*TODO: may be should consider nano-second also */ if (strncmp(cs_var, tdata->data, tdata->len) > 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "mtime " "passed is different from seen by file now." " Will skip truncating the file"); ret = -1; op_errno = EINVAL; goto unlock; } len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size); ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len, flags); if (ret) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, 0, "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE, ret); goto unlock; } len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks); ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len, flags); if (ret) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, 0, "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret); goto unlock; } len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize); ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len, flags); if (ret) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, 0, "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret); goto unlock; } memset(cs_var, 0, 4096); if (loc->path[0] == '/') { for (i = 1; i < strlen(loc->path); i++) { cs_var[i - 1] = loc->path[i]; } cs_var[i] = '\0'; gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var); } ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var, strlen(cs_var), flags); if (ret) { op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "setxattr failed - %s" " %d", GF_CS_OBJECT_SIZE, ret); goto unlock; } ret = sys_truncate(real_path, 0); if (ret) { op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "truncate failed - %s" " %d", GF_CS_OBJECT_SIZE, ret); ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE); if (ret) { op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "removexattr " "failed post processing- %s" " %d", GF_CS_OBJECT_SIZE, ret); } goto unlock; } else { state = GF_CS_REMOTE; ret = posix_cs_set_state(this, &xattr, state, real_path, NULL); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed"); } } } unlock: UNLOCK(&loc->inode->lock); op_ret = ret; goto out; } filler.real_path = real_path; filler.this = this; filler.stbuf = &preop; filler.loc = loc; #ifdef GF_DARWIN_HOST_OS filler.flags = map_xattr_flags(flags); #else filler.flags = flags; #endif op_ret = dict_foreach(dict, _handle_setxattr_keyvalue_pair, &filler); if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; goto out; } xattr = dict_new(); if (!xattr) goto out; /* * FIXFIX: Send the stbuf info in the xdata for now * This is used by DHT to redirect FOPs if the file is being migrated * Ignore errors for now */ ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postop, _gf_false); if (ret) goto out; ret = posix_set_iatt_in_dict(xattr, &preop, &postop); /* * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which * won't aware of access-control xlator. To update its context correctly, * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path. */ if (dict_get(dict, GF_POSIX_ACL_ACCESS)) { /* * The size of buffer will be know after calling sys_lgetxattr, * so first we allocate buffer with large size(~4k), then we * reduced into required size using GF_REALLO(). */ acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char); if (!acl_xattr) goto out; acl_size = sys_lgetxattr(real_path, POSIX_ACL_ACCESS_XATTR, acl_xattr, ACL_BUFFER_MAX); if (acl_size < 0) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, "Posix acl is not set " "properly at the backend"); goto out; } /* If acl_size is more than max buffer size, just ignore it */ if (acl_size >= ACL_BUFFER_MAX) { gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW, "size of acl is more" "than the buffer"); goto out; } acl_xattr = GF_REALLOC(acl_xattr, acl_size); if (!acl_xattr) goto out; ret = dict_set_bin(xattr, POSIX_ACL_ACCESS_XATTR, acl_xattr, acl_size); if (ret) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, "failed to set" "xdata for acl"); GF_FREE(acl_xattr); goto out; } } if (dict_get(dict, GF_POSIX_ACL_DEFAULT)) { acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char); if (!acl_xattr) goto out; acl_size = sys_lgetxattr(real_path, POSIX_ACL_DEFAULT_XATTR, acl_xattr, ACL_BUFFER_MAX); if (acl_size < 0) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, "Posix acl is not set " "properly at the backend"); goto out; } if (acl_size >= ACL_BUFFER_MAX) { gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW, "size of acl is more" "than the buffer"); goto out; } acl_xattr = GF_REALLOC(acl_xattr, acl_size); if (!acl_xattr) goto out; ret = dict_set_bin(xattr, POSIX_ACL_DEFAULT_XATTR, acl_xattr, acl_size); if (ret) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, "failed to set" "xdata for acl"); GF_FREE(acl_xattr); goto out; } } out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xattr); if (xattr) dict_unref(xattr); return 0; } int posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *dict, dict_t *xdata) { int ret = -1; int op_ret = -1; const char *fname = NULL; char *real_path = NULL; char *found = NULL; DIR *fd = NULL; struct dirent *entry = NULL; struct dirent scratch[2] = { { 0, }, }; MAKE_INODE_HANDLE(real_path, this, loc, NULL); if (!real_path) { return -ESTALE; } if (op_ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "posix_xattr_get_real_filename (lstat) on " "gfid-handle %s (path: %s) failed", real_path, loc->path); return -errno; } fd = sys_opendir(real_path); if (!fd) return -errno; fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY); for (;;) { errno = 0; entry = sys_readdir(fd, scratch); if (!entry || errno != 0) break; if (strcasecmp(entry->d_name, fname) == 0) { found = gf_strdup(entry->d_name); if (!found) { (void)sys_closedir(fd); return -ENOMEM; } break; } } (void)sys_closedir(fd); if (!found) return -ENOATTR; ret = dict_set_dynstr(dict, (char *)key, found); if (ret) { GF_FREE(found); return -ENOMEM; } ret = strlen(found) + 1; return ret; } int posix_get_ancestry_directory(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head, char **path, int type, int32_t *op_errno, dict_t *xdata) { ssize_t handle_size = 0; struct posix_private *priv = NULL; inode_t *inode = NULL; int ret = -1; char dirpath[PATH_MAX] = { 0, }; priv = this->private; handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); ret = posix_make_ancestryfromgfid( this, dirpath, PATH_MAX + 1, head, type | POSIX_ANCESTRY_PATH, leaf_inode->gfid, handle_size, priv->base_path, leaf_inode->table, &inode, xdata, op_errno); if (ret < 0) goto out; /* there is already a reference in loc->inode */ inode_unref(inode); if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) { if (strcmp(dirpath, "/")) dirpath[strlen(dirpath) - 1] = '\0'; *path = gf_strdup(dirpath); } out: return ret; } int32_t posix_links_in_same_directory(char *dirpath, int count, inode_t *leaf_inode, inode_t *parent, struct stat *stbuf, gf_dirent_t *head, char **path, int type, dict_t *xdata, int32_t *op_errno) { int op_ret = -1; gf_dirent_t *gf_entry = NULL; xlator_t *this = NULL; struct posix_private *priv = NULL; DIR *dirp = NULL; struct dirent *entry = NULL; struct dirent scratch[2] = { { 0, }, }; char temppath[PATH_MAX] = { 0, }; char scr[PATH_MAX * 4] = { 0, }; this = THIS; priv = this->private; dirp = sys_opendir(dirpath); if (!dirp) { *op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED, "could not opendir %s", dirpath); goto out; } while (count > 0) { errno = 0; entry = sys_readdir(dirp, scratch); if (!entry || errno != 0) break; if (entry->d_ino != stbuf->st_ino) continue; /* Linking an inode here, can cause a race in posix_acl. Parent inode gets linked here, but before it reaches posix_acl_readdirp_cbk, create/lookup can come on a leaf-inode, as parent-inode-ctx not yet updated in posix_acl_readdirp_cbk, create and lookup can fail with EACCESS. So do the inode linking in the quota xlator linked_inode = inode_link (leaf_inode, parent, entry->d_name, NULL); GF_ASSERT (linked_inode == leaf_inode); inode_unref (linked_inode);*/ if (type & POSIX_ANCESTRY_DENTRY) { loc_t loc = { 0, }; loc.inode = inode_ref(leaf_inode); gf_uuid_copy(loc.gfid, leaf_inode->gfid); (void)snprintf(temppath, sizeof(temppath), "%s/%s", dirpath, entry->d_name); gf_entry = gf_dirent_for_name(entry->d_name); if (!gf_entry) { gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "gf_entry is NULL"); op_ret = -1; *op_errno = ENOMEM; inode_unref(loc.inode); goto out; } gf_entry->inode = inode_ref(leaf_inode); gf_entry->dict = posix_xattr_fill(this, temppath, &loc, NULL, -1, xdata, NULL); iatt_from_stat(&(gf_entry->d_stat), stbuf); list_add_tail(&gf_entry->list, &head->list); loc_wipe(&loc); } if (type & POSIX_ANCESTRY_PATH) { (void)snprintf(temppath, sizeof(temppath), "%s/%s", &dirpath[priv->base_path_length], entry->d_name); if (!*path) { *path = gf_strdup(temppath); } else { /* creating a colon separated */ /* list of hard links */ (void)snprintf(scr, sizeof(scr), "%s:%s", *path, temppath); GF_FREE(*path); *path = gf_strdup(scr); } if (!*path) { op_ret = -1; *op_errno = ENOMEM; goto out; } } count--; } op_ret = 0; out: if (dirp) { op_ret = sys_closedir(dirp); if (op_ret == -1) { *op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_CLOSE_FAILED, "closedir failed"); } } return op_ret; } int posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head, char **path, int type, int32_t *op_errno, dict_t *xdata) { size_t remaining_size = 0; int op_ret = -1, pathlen = -1; ssize_t handle_size = 0; uuid_t pgfid = { 0, }; int nlink_samepgfid = 0; struct stat stbuf = { 0, }; char *list = NULL; int32_t list_offset = 0; struct posix_private *priv = NULL; ssize_t size = 0; inode_t *parent = NULL; loc_t *loc = NULL; char *leaf_path = NULL; char key[4096] = { 0, }; char dirpath[PATH_MAX] = { 0, }; char pgfidstr[UUID_CANONICAL_FORM_LEN + 1] = { 0, }; int len; priv = this->private; loc = GF_CALLOC(1, sizeof(*loc), gf_posix_mt_char); if (loc == NULL) { op_ret = -1; *op_errno = ENOMEM; goto out; } gf_uuid_copy(loc->gfid, leaf_inode->gfid); MAKE_INODE_HANDLE(leaf_path, this, loc, NULL); if (!leaf_path) { GF_FREE(loc); *op_errno = ESTALE; goto out; } GF_FREE(loc); size = sys_llistxattr(leaf_path, NULL, 0); if (size == -1) { *op_errno = errno; if ((errno == ENOTSUP) || (errno == ENOSYS)) { GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " "supported (try remounting brick" " with 'user_xattr' flag)"); } else { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, "listxattr failed on" "%s", leaf_path); } goto out; } if (size == 0) { op_ret = 0; goto out; } list = alloca(size); if (!list) { *op_errno = errno; goto out; } size = sys_llistxattr(leaf_path, list, size); if (size < 0) { op_ret = -1; *op_errno = errno; goto out; } remaining_size = size; list_offset = 0; op_ret = sys_lstat(leaf_path, &stbuf); if (op_ret == -1) { *op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, "lstat failed on %s", leaf_path); goto out; } while (remaining_size > 0) { len = snprintf(key, sizeof(key), "%s", list + list_offset); if (strncmp(key, PGFID_XATTR_KEY_PREFIX, SLEN(PGFID_XATTR_KEY_PREFIX)) != 0) goto next; op_ret = sys_lgetxattr(leaf_path, key, &nlink_samepgfid, sizeof(nlink_samepgfid)); if (op_ret == -1) { *op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on " "%s: key = %s ", leaf_path, key); goto out; } nlink_samepgfid = ntoh32(nlink_samepgfid); snprintf(pgfidstr, sizeof(pgfidstr), "%s", key + SLEN(PGFID_XATTR_KEY_PREFIX)); gf_uuid_parse(pgfidstr, pgfid); handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); /* constructing the absolute real path of parent dir */ snprintf(dirpath, sizeof(dirpath), "%s", priv->base_path); pathlen = PATH_MAX + 1 - priv->base_path_length; op_ret = posix_make_ancestryfromgfid( this, dirpath + priv->base_path_length, pathlen, head, type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path, leaf_inode->table, &parent, xdata, op_errno); if (op_ret < 0) { goto next; } dirpath[strlen(dirpath) - 1] = '\0'; posix_links_in_same_directory(dirpath, nlink_samepgfid, leaf_inode, parent, &stbuf, head, path, type, xdata, op_errno); if (parent != NULL) { inode_unref(parent); parent = NULL; } next: remaining_size -= (len + 1); list_offset += (len + 1); } /* while (remaining_size > 0) */ op_ret = 0; out: return op_ret; } int posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head, char **path, int type, int32_t *op_errno, dict_t *xdata) { int ret = -1; struct posix_private *priv = NULL; priv = this->private; if (IA_ISDIR(leaf_inode->ia_type)) { ret = posix_get_ancestry_directory(this, leaf_inode, head, path, type, op_errno, xdata); } else { if (!priv->update_pgfid_nlinks) goto out; ret = posix_get_ancestry_non_directory(this, leaf_inode, head, path, type, op_errno, xdata); } out: if (ret && path && *path) { GF_FREE(*path); *path = NULL; } return ret; } /** * posix_getxattr - this function returns a dictionary with all the * key:value pair present as xattr. used for * both 'listxattr' and 'getxattr'. */ int32_t posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { struct posix_private *priv = NULL; int32_t op_ret = -1; int32_t op_errno = 0; char *value = NULL; char *real_path = NULL; dict_t *dict = NULL; int ret = -1; char *path = NULL; char *rpath = NULL; ssize_t size = 0; char *list = NULL; int32_t list_offset = 0; size_t remaining_size = 0; char *host_buf = NULL; char *keybuffer = NULL; int keybuff_len; char *value_buf = NULL; gf_boolean_t have_val = _gf_false; struct iatt buf = { 0, }; dict_t *xattr_rsp = NULL; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(loc, out); VALIDATE_OR_GOTO(this->private, out); SET_FS_ID(frame->root->uid, frame->root->gid); MAKE_INODE_HANDLE(real_path, this, loc, NULL); op_ret = -1; priv = this->private; ret = posix_handle_georep_xattrs(frame, name, &op_errno, _gf_true); if (ret == -1) { op_ret = -1; /* errno should be set from the above function*/ goto out; } ret = posix_handle_mdata_xattr(frame, name, &op_errno); if (ret == -1) { op_ret = -1; /* errno should be set from the above function*/ goto out; } if (name && posix_is_gfid2path_xattr(name)) { op_ret = -1; op_errno = ENOATTR; goto out; } dict = dict_new(); if (!dict) { op_errno = ENOMEM; goto out; } if (loc->inode && name && GF_POSIX_ACL_REQUEST(name)) { ret = posix_pacl_get(real_path, -1, name, &value); if (ret || !value) { op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, "could not get acl (%s) for" "gfid-handle %s (path: %s)", name, real_path, loc->path); op_ret = -1; goto out; } ret = dict_set_dynstr(dict, (char *)name, value); if (ret < 0) { GF_FREE(value); gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, "could not set acl (%s) for %s " "(gfid-handle: %s) in dictionary", name, loc->path, real_path); op_ret = -1; op_errno = ENOMEM; goto out; } size = ret; goto done; } if (loc->inode && name && (strncmp(name, GF_XATTR_GET_REAL_FILENAME_KEY, SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { ret = posix_xattr_get_real_filename(frame, this, loc, name, dict, xdata); if (ret < 0) { op_ret = -1; op_errno = -ret; if (op_errno == ENOATTR) { gf_msg_debug(this->name, 0, "Failed to get " "real filename (%s, %s)", loc->path, name); } else { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_GETTING_FILENAME_FAILED, "Failed to get real filename (%s, %s):", loc->path, name); } goto out; } size = ret; goto done; } if (loc->inode && name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) { if (!fd_list_empty(loc->inode)) { ret = dict_set_uint32(dict, (char *)name, 1); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "Failed to set " "dictionary value for %s", name); op_errno = ENOMEM; goto out; } } else { ret = dict_set_uint32(dict, (char *)name, 0); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "Failed to set " "dictionary value for %s", name); op_errno = ENOMEM; goto out; } } goto done; } if (loc->inode && name && (XATTR_IS_PATHINFO(name))) { VALIDATE_OR_GOTO(this->private, out); if (LOC_HAS_ABSPATH(loc)) { MAKE_REAL_PATH(rpath, this, loc->path); } else { rpath = real_path; } size = gf_asprintf( &host_buf, "", priv->base_path, ((priv->node_uuid_pathinfo && !gf_uuid_is_null(priv->glusterd_uuid)) ? uuid_utoa(priv->glusterd_uuid) : priv->hostname), rpath); if (size < 0) { op_errno = ENOMEM; goto out; } ret = dict_set_dynstr(dict, (char *)name, host_buf); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "could not set value" " (%s) in dictionary", host_buf); GF_FREE(host_buf); op_errno = ENOMEM; goto out; } goto done; } if (loc->inode && name && (strcmp(name, GF_XATTR_NODE_UUID_KEY) == 0) && !gf_uuid_is_null(priv->glusterd_uuid)) { size = gf_asprintf(&host_buf, "%s", uuid_utoa(priv->glusterd_uuid)); if (size == -1) { op_errno = ENOMEM; goto out; } ret = dict_set_dynstr(dict, GF_XATTR_NODE_UUID_KEY, host_buf); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, "could not set value" "(%s) in dictionary", host_buf); GF_FREE(host_buf); op_errno = -ret; goto out; } goto done; } if (loc->inode && name && (strcmp(name, GFID_TO_PATH_KEY) == 0)) { ret = inode_path(loc->inode, NULL, &path); if (ret < 0) { op_errno = -ret; gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_INODE_PATH_GET_FAILED, "%s: could not get " "inode path", uuid_utoa(loc->inode->gfid)); goto out; } size = ret; ret = dict_set_dynstr(dict, GFID_TO_PATH_KEY, path); if (ret < 0) { op_errno = ENOMEM; GF_FREE(path); goto out; } goto done; } if (loc->inode && name && (strcmp(name, GFID2PATH_VIRT_XATTR_KEY) == 0)) { if (!priv->gfid2path) { op_errno = ENOATTR; op_ret = -1; goto out; } ret = posix_get_gfid2path(this, loc->inode, real_path, &op_errno, dict); if (ret < 0) { op_ret = -1; goto out; } size = ret; goto done; } if (loc->inode && name && (strcmp(name, GET_ANCESTRY_PATH_KEY) == 0)) { int type = POSIX_ANCESTRY_PATH; op_ret = posix_get_ancestry(this, loc->inode, NULL, &path, type, &op_errno, xdata); if (op_ret < 0) { op_ret = -1; op_errno = ENODATA; goto out; } size = op_ret; op_ret = dict_set_dynstr(dict, GET_ANCESTRY_PATH_KEY, path); if (op_ret < 0) { gf_msg(this->name, GF_LOG_WARNING, -op_ret, P_MSG_GET_KEY_VALUE_FAILED, "could not get " "value for key (%s)", GET_ANCESTRY_PATH_KEY); GF_FREE(path); op_errno = ENOMEM; goto out; } goto done; } if (loc->inode && name && (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { op_ret = posix_get_objectsignature(real_path, dict); if (op_ret < 0) { op_errno = -op_ret; goto out; } goto done; } /* here allocate value_buf of 8192 bytes to avoid one extra getxattr call,If buffer size is small to hold the xattr result then it will allocate a new buffer value of required size and call getxattr again */ value_buf = alloca(XATTR_VAL_BUF_SIZE); if (name) { char *key = (char *)name; keybuffer = key; #if defined(GF_DARWIN_HOST_OS_DISABLED) if (priv->xattr_user_namespace == XATTR_STRIP) { if (strncmp(key, "user.", 5) == 0) { key += 5; gf_msg_debug(this->name, 0, "getxattr for file %s (gfid-handle: %s)" " stripping user key: %s -> %s", loc->path, real_path, keybuffer, key); } } #endif size = sys_lgetxattr(real_path, key, value_buf, XATTR_VAL_BUF_SIZE - 1); if (size >= 0) { have_val = _gf_true; } else { if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "getxattr failed due to overflow of buffer" " on gfid-handle %s (path: %s) : %s ", real_path, loc->path, key); size = sys_lgetxattr(real_path, key, NULL, 0); } if (size == -1) { op_errno = errno; if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " "supported (try remounting" " brick with 'user_xattr' " "flag)"); } if ((op_errno == ENOATTR) || (op_errno == ENODATA)) { gf_msg_debug(this->name, 0, "No such attribute:%s for file %s (path: %s)", key, real_path, loc->path); } else { gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, "getxattr failed on " "%s (path: %s): %s ", real_path, loc->path, key); } goto out; } } value = GF_MALLOC(size + 1, gf_posix_mt_char); if (!value) { op_ret = -1; op_errno = ENOMEM; goto out; } if (have_val) { memcpy(value, value_buf, size); } else { bzero(value, size + 1); size = sys_lgetxattr(real_path, key, value, size); if (size == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on %s (path: %s): key = %s", real_path, loc->path, key); GF_FREE(value); goto out; } } value[size] = '\0'; op_ret = dict_set_dynptr(dict, key, value, size); if (op_ret < 0) { op_errno = -op_ret; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, "dict set operation " "on %s (gfid-handle: %s) for the key %s failed.", loc->path, real_path, key); GF_FREE(value); goto out; } goto done; } have_val = _gf_false; size = sys_llistxattr(real_path, value_buf, XATTR_VAL_BUF_SIZE - 1); if (size > 0) { have_val = _gf_true; } else { if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "listxattr failed due to overflow of buffer" " on %s (path: %s) ", real_path, loc->path); size = sys_llistxattr(real_path, NULL, 0); } if (size == -1) { op_errno = errno; if ((errno == ENOTSUP) || (errno == ENOSYS)) { GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " "supported (try remounting" " brick with 'user_xattr' " "flag)"); } else { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "listxattr failed on %s (path: %s)", real_path, loc->path); } goto out; } if (size == 0) goto done; } list = alloca(size); if (!list) { op_errno = errno; goto out; } if (have_val) { memcpy(list, value_buf, size); } else { size = sys_llistxattr(real_path, list, size); if (size < 0) { op_ret = -1; op_errno = errno; goto out; } } remaining_size = size; list_offset = 0; keybuffer = alloca(XATTR_KEY_BUF_SIZE); while (remaining_size > 0) { keybuff_len = snprintf(keybuffer, XATTR_KEY_BUF_SIZE, "%s", list + list_offset); ret = posix_handle_georep_xattrs(frame, keybuffer, NULL, _gf_false); if (ret == -1) goto ignore; ret = posix_handle_mdata_xattr(frame, keybuffer, &op_errno); if (ret == -1) { goto ignore; } if (posix_is_gfid2path_xattr(keybuffer)) { goto ignore; } have_val = _gf_false; size = sys_lgetxattr(real_path, keybuffer, value_buf, XATTR_VAL_BUF_SIZE - 1); if (size >= 0) { have_val = _gf_true; } else { if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED, "getxattr failed due to overflow of" " buffer on %s (path: %s): %s ", real_path, loc->path, keybuffer); size = sys_lgetxattr(real_path, keybuffer, NULL, 0); } if (size == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on" " %s (path: %s): key = %s ", real_path, loc->path, keybuffer); goto out; } } value = GF_MALLOC(size + 1, gf_posix_mt_char); if (!value) { op_errno = errno; goto out; } if (have_val) { memcpy(value, value_buf, size); } else { bzero(value, size + 1); size = sys_lgetxattr(real_path, keybuffer, value, size); if (size == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on" " %s (path: %s): key = %s ", real_path, loc->path, keybuffer); GF_FREE(value); goto out; } } value[size] = '\0'; #ifdef GF_DARWIN_HOST_OS /* The protocol expect namespace for now */ char *newkey = NULL; gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey); keybuff_len = snprintf(keybuffer, sizeof(keybuffer), "%s", newkey); GF_FREE(newkey); #endif op_ret = dict_set_dynptr(dict, keybuffer, value, size); if (op_ret < 0) { op_errno = -op_ret; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, "dict set operation " "on %s (gfid-handle: %s) for the key %s failed.", loc->path, real_path, keybuffer); GF_FREE(value); goto out; } ignore: remaining_size -= keybuff_len + 1; list_offset += keybuff_len + 1; } /* while (remaining_size > 0) */ done: op_ret = size; if (xdata && (op_ret >= 0)) { xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, &buf); } if (dict) { dict_del(dict, GFID_XATTR_KEY); dict_del(dict, GF_XATTR_VOL_ID_KEY); } out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xattr_rsp); if (xattr_rsp) dict_unref(xattr_rsp); if (dict) { dict_unref(dict); } return 0; } int32_t posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = EINVAL; struct posix_fd *pfd = NULL; int _fd = -1; int32_t list_offset = 0; ssize_t size = 0; size_t remaining_size = 0; char *value = NULL; char *list = NULL; dict_t *dict = NULL; int ret = -1; char key[4096] = { 0, }; int key_len; char *value_buf = NULL; gf_boolean_t have_val = _gf_false; struct iatt buf = { 0, }; dict_t *xattr_rsp = NULL; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); SET_FS_ID(frame->root->uid, frame->root->gid); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { op_ret = -1; gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); goto out; } _fd = pfd->fd; /* Get the total size */ dict = dict_new(); if (!dict) { op_ret = -1; op_errno = ENOMEM; goto out; } if (name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) { ret = dict_set_uint32(dict, (char *)name, 1); if (ret < 0) { op_ret = -1; size = -1; op_errno = ENOMEM; gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "Failed to set " "dictionary value for %s", name); goto out; } goto done; } if (name && strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) { op_ret = posix_fdget_objectsignature(_fd, dict); if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "posix_fdget_objectsignature failed"); op_errno = -op_ret; op_ret = -1; size = -1; goto out; } goto done; } /* here allocate value_buf of 8192 bytes to avoid one extra getxattr call,If buffer size is small to hold the xattr result then it will allocate a new buffer value of required size and call getxattr again */ value_buf = alloca(XATTR_VAL_BUF_SIZE); if (name) { key_len = snprintf(key, sizeof(key), "%s", name); #ifdef GF_DARWIN_HOST_OS struct posix_private *priv = NULL; priv = this->private; if (priv->xattr_user_namespace == XATTR_STRIP) { char *newkey = NULL; gf_add_prefix(XATTR_USER_PREFIX, key, &newkey); key_len = snprintf(key, sizeof(key), "%s", newkey); GF_FREE(newkey); } #endif size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1); if (size >= 0) { have_val = _gf_true; } else { if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "fgetxattr failed due to overflow of" "buffer on %s ", key); size = sys_fgetxattr(_fd, key, NULL, 0); } if (size == -1) { op_errno = errno; if (errno == ENODATA || errno == ENOATTR) { gf_msg_debug(this->name, 0, "fgetxattr" " failed on key %s (%s)", key, strerror(op_errno)); } else { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr" " failed on key %s", key); } goto done; } } value = GF_MALLOC(size + 1, gf_posix_mt_char); if (!value) { op_ret = -1; op_errno = ENOMEM; goto out; } if (have_val) { memcpy(value, value_buf, size); } else { bzero(value, size + 1); size = sys_fgetxattr(_fd, key, value, size); if (size == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr" " failed on fd %p for the key %s ", fd, key); GF_FREE(value); goto out; } } value[size] = '\0'; op_ret = dict_set_dynptr(dict, key, value, size); if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED, "dict set operation " "on key %s failed", key); GF_FREE(value); goto out; } goto done; } size = sys_flistxattr(_fd, value_buf, XATTR_VAL_BUF_SIZE - 1); if (size > 0) { have_val = _gf_true; } else { if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "listxattr failed due to overflow of buffer" " on %p ", fd); size = sys_flistxattr(_fd, NULL, 0); } if (size == -1) { op_ret = -1; op_errno = errno; if ((errno == ENOTSUP) || (errno == ENOSYS)) { GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " "supported (try remounting " "brick with 'user_xattr' flag)"); } else { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "listxattr failed " "on %p:", fd); } goto out; } if (size == 0) goto done; } list = alloca(size + 1); if (!list) { op_ret = -1; op_errno = ENOMEM; goto out; } if (have_val) memcpy(list, value_buf, size); else size = sys_flistxattr(_fd, list, size); remaining_size = size; list_offset = 0; while (remaining_size > 0) { if (*(list + list_offset) == '\0') break; key_len = snprintf(key, sizeof(key), "%s", list + list_offset); have_val = _gf_false; size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1); if (size >= 0) { have_val = _gf_true; } else { if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "fgetxattr failed due to overflow of buffer" " on fd %p: for the key %s ", fd, key); size = sys_fgetxattr(_fd, key, NULL, 0); } if (size == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr failed " "on fd %p for the key %s ", fd, key); break; } } value = GF_MALLOC(size + 1, gf_posix_mt_char); if (!value) { op_ret = -1; op_errno = errno; goto out; } if (have_val) { memcpy(value, value_buf, size); } else { bzero(value, size + 1); size = sys_fgetxattr(_fd, key, value, size); if (size == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr failed o" "n the fd %p for the key %s ", fd, key); GF_FREE(value); break; } } value[size] = '\0'; op_ret = dict_set_dynptr(dict, key, value, size); if (op_ret) { op_errno = -op_ret; op_ret = -1; gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED, "dict set operation " "failed on key %s", key); GF_FREE(value); goto out; } remaining_size -= key_len + 1; list_offset += key_len + 1; } /* while (remaining_size > 0) */ done: op_ret = size; if (xdata && (op_ret >= 0)) { xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata, &buf); } if (dict) { dict_del(dict, GFID_XATTR_KEY); dict_del(dict, GF_XATTR_VOL_ID_KEY); } out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xattr_rsp); if (xattr_rsp) dict_unref(xattr_rsp); if (dict) dict_unref(dict); return 0; } static int _handle_fsetxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) { posix_xattr_filler_t *filler = NULL; filler = tmp; return posix_fhandle_pair(filler->frame, filler->this, filler->fdnum, k, v, filler->flags, filler->stbuf, filler->fd); } int32_t posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct posix_fd *pfd = NULL; int _fd = -1; int ret = -1; struct iatt preop = { 0, }; struct iatt postop = { 0, }; dict_t *xattr = NULL; posix_xattr_filler_t filler = { 0, }; struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); VALIDATE_OR_GOTO(dict, out); priv = this->private; DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); goto out; } _fd = pfd->fd; ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); if (ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED, "fsetxattr (fstat)" "failed on fd=%p", fd); goto out; } dict_del(dict, GFID_XATTR_KEY); dict_del(dict, GF_XATTR_VOL_ID_KEY); filler.fdnum = _fd; filler.this = this; filler.frame = frame; filler.stbuf = &preop; filler.fd = fd; #ifdef GF_DARWIN_HOST_OS filler.flags = map_xattr_flags(flags); #else filler.flags = flags; #endif op_ret = dict_foreach(dict, _handle_fsetxattr_keyvalue_pair, &filler); if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; } if (!ret && xdata && dict_get(xdata, GLUSTERFS_DURABLE_OP)) { op_ret = sys_fsync(_fd); if (op_ret < 0) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_DURABILITY_REQ_NOT_SATISFIED, "could not satisfy durability request: " "reason "); } } ret = posix_fdstat(this, fd->inode, pfd->fd, &postop); if (ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, "fsetxattr (fstat)" "failed on fd=%p", fd); goto out; } xattr = dict_new(); if (!xattr) goto out; ret = posix_set_iatt_in_dict(xattr, &preop, &postop); out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xattr); if (xattr) dict_unref(xattr); return 0; } int _posix_remove_xattr(dict_t *dict, char *key, data_t *value, void *data) { int32_t op_ret = 0; xlator_t *this = NULL; posix_xattr_filler_t *filler = NULL; filler = (posix_xattr_filler_t *)data; this = filler->this; #ifdef GF_DARWIN_HOST_OS struct posix_private *priv = NULL; priv = (struct posix_private *)this->private; char *newkey = NULL; if (priv->xattr_user_namespace == XATTR_STRIP) { gf_remove_prefix(XATTR_USER_PREFIX, key, &newkey); gf_msg_debug("remove_xattr", 0, "key %s => %s", key, newkey); key = newkey; } #endif /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may * have special behavior. Ex: removexattr("posix.system_acl_access"), * removes more than one xattr on the file that could be present in the * bulk-removal request. Removexattr of these deleted xattrs will fail * with either ENODATA/ENOATTR. Since all this fop cares is removal of the * xattrs in bulk-remove request and if they are already deleted, it can be * treated as success. */ if (filler->real_path) op_ret = sys_lremovexattr(filler->real_path, key); else op_ret = sys_fremovexattr(filler->fdnum, key); if (op_ret == -1) { if (errno == ENODATA || errno == ENOATTR) op_ret = 0; } if (op_ret == -1) { filler->op_errno = errno; if (errno != ENOATTR && errno != ENODATA && errno != EPERM) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "removexattr failed on " "file/dir %s with gfid: %s (for %s)", filler->real_path ? filler->real_path : "", uuid_utoa(filler->inode->gfid), key); } } #ifdef GF_DARWIN_HOST_OS GF_FREE(newkey); #endif return op_ret; } int posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd, const char *name, dict_t *xdata, int *op_errno, dict_t **xdata_rsp) { gf_boolean_t bulk_removexattr = _gf_false; gf_boolean_t disallow = _gf_false; char *real_path = NULL; struct posix_fd *pfd = NULL; int op_ret = 0; struct iatt preop = { 0, }; struct iatt postop = { 0, }; int ret = 0; int _fd = -1; xlator_t *this = frame->this; inode_t *inode = NULL; posix_xattr_filler_t filler = {0}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); if (loc) { MAKE_INODE_HANDLE(real_path, this, loc, NULL); if (!real_path) { op_ret = -1; *op_errno = ESTALE; goto out; } inode = loc->inode; } else { op_ret = posix_fd_ctx_get(fd, this, &pfd, op_errno); if (op_ret < 0) { gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); goto out; } _fd = pfd->fd; inode = fd->inode; } if (posix_is_gfid2path_xattr(name)) { op_ret = -1; *op_errno = ENOATTR; goto out; } if (loc) { ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, "pstat operaton failed on %s", real_path); } } else { ret = posix_fdstat(this, inode, _fd, &preop); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, "fdstat operaton failed on %s", real_path); } } if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, "Remove xattr called on %s for file/dir %s with gfid: " "%s", name, real_path ? real_path : "", uuid_utoa(inode->gfid)); op_ret = -1; *op_errno = EPERM; goto out; } else if (posix_is_bulk_removexattr((char *)name, xdata)) { bulk_removexattr = _gf_true; (void)dict_has_key_from_array(xdata, disallow_removexattrs, &disallow); if (disallow) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, "Bulk removexattr has keys that shouldn't be " "removed for file/dir %s with gfid: %s", real_path ? real_path : "", uuid_utoa(inode->gfid)); op_ret = -1; *op_errno = EPERM; goto out; } } if (bulk_removexattr) { filler.real_path = real_path; filler.this = this; filler.fdnum = _fd; filler.inode = inode; op_ret = dict_foreach(xdata, _posix_remove_xattr, &filler); if (op_ret) { *op_errno = filler.op_errno; goto out; } } else { if (loc) op_ret = sys_lremovexattr(real_path, name); else op_ret = sys_fremovexattr(_fd, name); if (op_ret == -1) { *op_errno = errno; if (*op_errno != ENOATTR && *op_errno != ENODATA && *op_errno != EPERM) { gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, "removexattr on %s with gfid %s " "(for %s)", real_path, uuid_utoa(inode->gfid), name); } goto out; } } if (loc) { posix_set_ctime(frame, this, real_path, -1, inode, NULL); ret = posix_pstat(this, inode, loc->gfid, real_path, &postop, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, "pstat operaton failed on %s", real_path); } } else { posix_set_ctime(frame, this, NULL, _fd, inode, NULL); ret = posix_fdstat(this, inode, _fd, &postop); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, "fdstat operaton failed on %s", real_path); } } if (ret) goto out; *xdata_rsp = dict_new(); if (!*xdata_rsp) goto out; ret = posix_set_iatt_in_dict(*xdata_rsp, &preop, &postop); op_ret = 0; out: SET_TO_OLD_FS_ID(); return op_ret; } int32_t posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { int op_ret = -1; int op_errno = EINVAL; dict_t *xdata_rsp = NULL; VALIDATE_OR_GOTO(loc, out); op_ret = posix_common_removexattr(frame, loc, NULL, name, xdata, &op_errno, &xdata_rsp); out: STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata_rsp); if (xdata_rsp) dict_unref(xdata_rsp); return 0; } int32_t posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = EINVAL; dict_t *xdata_rsp = NULL; VALIDATE_OR_GOTO(fd, out); op_ret = posix_common_removexattr(frame, NULL, fd, name, xdata, &op_errno, &xdata_rsp); out: STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata_rsp); if (xdata_rsp) dict_unref(xdata_rsp); return 0; } int32_t posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int ret = -1; struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL, fd=%p", fd); goto out; } op_ret = 0; out: STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, NULL); return 0; } void posix_print_xattr(dict_t *this, char *key, data_t *value, void *data) { gf_msg_debug("posix", 0, "(key/val) = (%s/%d)", key, data_to_int32(value)); } /** * add_array - add two arrays of 32-bit numbers (stored in network byte order) * dest = dest + src * @count: number of 32-bit numbers * FIXME: handle overflow */ static void __add_array(int32_t *dest, int32_t *src, int count) { int i = 0; int32_t destval = 0; for (i = 0; i < count; i++) { destval = ntoh32(dest[i]); dest[i] = hton32(destval + ntoh32(src[i])); } } static void __add_long_array(int64_t *dest, int64_t *src, int count) { int i = 0; for (i = 0; i < count; i++) { dest[i] = hton64(ntoh64(dest[i]) + ntoh64(src[i])); } } /* functions: __add_array_with_default __add_long_array_with_default xattrop type: GF_XATTROP_ADD_ARRAY_WITH_DEFAULT GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT These operations are similar to 'GF_XATTROP_ADD_ARRAY', except that it adds a default value if xattr is missing or its value is zero on disk. One use-case of this operation is in inode-quota. When a new directory is created, its default dir_count should be set to 1. So when a xattrop performed setting inode-xattrs, it should account initial dir_count 1 if the xattrs are not present Here is the usage of this operation value required in xdata for each key struct array { int32_t newvalue_1; int32_t newvalue_2; ... int32_t newvalue_n; int32_t default_1; int32_t default_2; ... int32_t default_n; }; or struct array { int32_t value_1; int32_t value_2; ... int32_t value_n; } data[2]; fill data[0] with new value to add fill data[1] with default value xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT for i from 1 to n { if (xattr (dest_i) is zero or not set in the disk) dest_i = newvalue_i + default_i else dest_i = dest_i + newvalue_i } value in xdata after xattrop is successful struct array { int32_t dest_1; int32_t dest_2; ... int32_t dest_n; }; */ static void __add_array_with_default(int32_t *dest, int32_t *src, int count) { int i = 0; int32_t destval = 0; for (i = 0; i < count; i++) { destval = ntoh32(dest[i]); if (destval == 0) dest[i] = hton32(ntoh32(src[i]) + ntoh32(src[count + i])); else dest[i] = hton32(destval + ntoh32(src[i])); } } static void __add_long_array_with_default(int64_t *dest, int64_t *src, int count) { int i = 0; int64_t destval = 0; for (i = 0; i < count; i++) { destval = ntoh64(dest[i]); if (destval == 0) dest[i] = hton64(ntoh64(src[i]) + ntoh64(src[i + count])); else dest[i] = hton64(destval + ntoh64(src[i])); } } static int _posix_handle_xattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) { int size = 0; int count = 0; int op_ret = 0; int op_errno = 0; gf_xattrop_flags_t optype = 0; char *array = NULL; char *dst_data = NULL; inode_t *inode = NULL; xlator_t *this = NULL; posix_xattr_filler_t *filler = NULL; posix_inode_ctx_t *ctx = NULL; filler = tmp; optype = (gf_xattrop_flags_t)(filler->flags); this = filler->this; inode = filler->inode; count = v->len; if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT || optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT) count = count / 2; array = GF_CALLOC(count, sizeof(char), gf_posix_mt_char); #ifdef GF_DARWIN_HOST_OS struct posix_private *priv = NULL; priv = this->private; if (priv->xattr_user_namespace == XATTR_STRIP) { if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { k += XATTR_USER_PREFIX_LEN; } } #endif op_ret = posix_inode_ctx_get_all(inode, this, &ctx); if (op_ret < 0) { op_errno = ENOMEM; goto out; } pthread_mutex_lock(&ctx->xattrop_lock); { if (filler->real_path) { size = sys_lgetxattr(filler->real_path, k, (char *)array, count); } else { size = sys_fgetxattr(filler->fdnum, k, (char *)array, count); } op_errno = errno; if ((size == -1) && (op_errno != ENODATA) && (op_errno != ENOATTR)) { if (op_errno == ENOTSUP) { GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " "supported by filesystem"); } else if (op_errno != ENOENT || !posix_special_xattr(marker_xattrs, k)) { if (filler->real_path) gf_msg(this->name, fop_log_level(GF_FOP_XATTROP, op_errno), op_errno, P_MSG_XATTR_FAILED, "getxattr failed on %s while " "doing xattrop: Key:%s ", filler->real_path, k); else gf_msg( this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, "fgetxattr failed on gfid=%s " "while doing xattrop: " "Key:%s (%s)", uuid_utoa(filler->inode->gfid), k, strerror(op_errno)); } op_ret = -1; goto unlock; } if (size == -1 && optype == GF_XATTROP_GET_AND_SET) { GF_FREE(array); array = NULL; } /* We only write back the xattr if it has been really modified * (i.e. v->data is not all 0's). Otherwise we return its value * but we don't update anything. * * If the xattr does not exist, a value of all 0's is returned * without creating it. */ size = count; if (optype != GF_XATTROP_GET_AND_SET && mem_0filled(v->data, v->len) == 0) goto unlock; dst_data = array; switch (optype) { case GF_XATTROP_ADD_ARRAY: __add_array((int32_t *)array, (int32_t *)v->data, count / 4); break; case GF_XATTROP_ADD_ARRAY64: __add_long_array((int64_t *)array, (int64_t *)v->data, count / 8); break; case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT: __add_array_with_default((int32_t *)array, (int32_t *)v->data, count / 4); break; case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT: __add_long_array_with_default((int64_t *)array, (int64_t *)v->data, count / 8); break; case GF_XATTROP_GET_AND_SET: dst_data = v->data; break; default: gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_UNKNOWN_OP, "Unknown xattrop type (%d)" " on %s. Please send a bug report to " "gluster-devel@gluster.org", optype, filler->real_path); op_ret = -1; op_errno = EINVAL; goto unlock; } if (filler->real_path) { size = sys_lsetxattr(filler->real_path, k, dst_data, count, 0); } else { size = sys_fsetxattr(filler->fdnum, k, (char *)dst_data, count, 0); } op_errno = errno; } unlock: pthread_mutex_unlock(&ctx->xattrop_lock); if (op_ret == -1) goto out; if (size == -1) { if (filler->real_path) gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, "setxattr failed on %s " "while doing xattrop: key=%s", filler->real_path, k); else gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, "fsetxattr failed on gfid=%s while doing " "xattrop: key=%s (%s)", uuid_utoa(filler->inode->gfid), k, strerror(op_errno)); op_ret = -1; goto out; } else if (array) { op_ret = dict_set_bin(filler->xattr, k, array, count); if (op_ret) { if (filler->real_path) gf_msg_debug(this->name, 0, "dict_set_bin failed (path=%s): " "key=%s (%s)", filler->real_path, k, strerror(-size)); else gf_msg_debug(this->name, 0, "dict_set_bin failed (gfid=%s): " "key=%s (%s)", uuid_utoa(filler->inode->gfid), k, strerror(-size)); op_ret = -1; op_errno = EINVAL; GF_FREE(array); array = NULL; goto out; } array = NULL; } out: if (op_ret < 0) filler->op_errno = op_errno; if (array) GF_FREE(array); return op_ret; } /** * xattrop - xattr operations - for internal use by GlusterFS * @optype: ADD_ARRAY: * dict should contain: * "key" ==> array of 32-bit numbers */ int do_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { int op_ret = 0; int op_errno = 0; int _fd = -1; char *real_path = NULL; struct posix_fd *pfd = NULL; inode_t *inode = NULL; posix_xattr_filler_t filler = { 0, }; dict_t *xattr_rsp = NULL; dict_t *xdata_rsp = NULL; struct iatt stbuf = {0}; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(xattr, out); VALIDATE_OR_GOTO(this, out); if (fd) { op_ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (op_ret < 0) { gf_msg(this->name, GF_LOG_WARNING, fop_log_level(GF_FOP_FXATTROP, op_errno), P_MSG_PFD_GET_FAILED, "failed to get pfd from" " fd=%p", fd); goto out; } _fd = pfd->fd; } if (loc && !gf_uuid_is_null(loc->gfid)) { MAKE_INODE_HANDLE(real_path, this, loc, NULL); if (!real_path) { op_ret = -1; op_errno = ESTALE; goto out; } } if (real_path) { inode = loc->inode; } else if (fd) { inode = fd->inode; } xattr_rsp = dict_new(); if (xattr_rsp == NULL) { op_ret = -1; op_errno = ENOMEM; goto out; } filler.this = this; filler.fdnum = _fd; filler.real_path = real_path; filler.flags = (int)optype; filler.inode = inode; filler.xattr = xattr_rsp; op_ret = dict_foreach(xattr, _posix_handle_xattr_keyvalue_pair, &filler); op_errno = filler.op_errno; if (op_ret < 0) goto out; if (!xdata) goto out; if (fd) { op_ret = posix_fdstat(this, inode, _fd, &stbuf); } else { op_ret = posix_pstat(this, inode, inode->gfid, real_path, &stbuf, _gf_false); } if (op_ret < 0) { op_errno = errno; goto out; } xdata_rsp = posix_xattr_fill(this, real_path, loc, fd, _fd, xdata, &stbuf); if (!xdata_rsp) { op_ret = -1; op_errno = ENOMEM; } posix_set_mode_in_dict(xdata, xdata_rsp, &stbuf); out: STACK_UNWIND_STRICT(xattrop, frame, op_ret, op_errno, xattr_rsp, xdata_rsp); if (xattr_rsp) dict_unref(xattr_rsp); if (xdata_rsp) dict_unref(xdata_rsp); return 0; } int posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop(frame, this, loc, NULL, optype, xattr, xdata); return 0; } int posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop(frame, this, NULL, fd, optype, xattr, xdata); return 0; } int posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(loc, out); MAKE_INODE_HANDLE(real_path, this, loc, NULL); if (!real_path) { op_ret = -1; op_errno = errno; goto out; } op_ret = sys_access(real_path, mask & 07); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED, "access failed on %s", real_path); goto out; } op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct iatt preop = { 0, }; struct iatt postop = { 0, }; struct posix_fd *pfd = NULL; int ret = -1; struct posix_private *priv = NULL; dict_t *rsp_xdata = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL, fd=%p", fd); goto out; } _fd = pfd->fd; op_ret = posix_fdstat(this, fd->inode, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd); goto out; } if (xdata) { op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, &rsp_xdata, _gf_false); if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); op_errno = EIO; goto out; } } posix_update_iatt_buf(&preop, _fd, NULL, xdata); op_ret = sys_ftruncate(_fd, offset); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, "ftruncate failed on fd=%p (%" PRId64 "", fd, offset); goto out; } op_ret = posix_fdstat(this, fd->inode, _fd, &postop); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "post-operation fstat failed on fd=%p", fd); goto out; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, &preop, &postop, NULL); return 0; } int32_t posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { int _fd = -1; int32_t op_ret = -1; int32_t op_errno = 0; struct iatt buf = { 0, }; struct posix_fd *pfd = NULL; dict_t *xattr_rsp = NULL; int ret = -1; struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); if (!xdata) gf_msg_trace(this->name, 0, "null xdata passed, fd %p", fd); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL, fd=%p", fd); goto out; } _fd = pfd->fd; op_ret = posix_fdstat(this, fd->inode, _fd, &buf); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fstat failed on fd=%p", fd); goto out; } if (xdata) { xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, _fd, xdata, &buf); op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata, &xattr_rsp, _gf_false); if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); } posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL); } posix_update_iatt_buf(&buf, _fd, NULL, xdata); op_ret = 0; out: SET_TO_OLD_FS_ID(); STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xattr_rsp); if (xattr_rsp) dict_unref(xattr_rsp); return 0; } int32_t posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, struct gf_lease *lease, dict_t *xdata) { struct gf_lease nullease = { 0, }; gf_msg(this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED, "\"features/leases\" translator is not loaded. You need" "to use it for proper functioning of your application"); STACK_UNWIND_STRICT(lease, frame, -1, ENOSYS, &nullease, NULL); return 0; } static int gf_posix_lk_log; int32_t posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { struct gf_flock nullock = { 0, }; GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); STACK_UNWIND_STRICT(lk, frame, -1, ENOSYS, &nullock, NULL); return 0; } int32_t posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); STACK_UNWIND_STRICT(inodelk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); STACK_UNWIND_STRICT(finodelk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); STACK_UNWIND_STRICT(entrylk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOSYS, NULL); return 0; } int posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size, gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { off_t in_case = -1; off_t last_off = 0; size_t filled = 0; int count = 0; int32_t this_size = -1; gf_dirent_t *this_entry = NULL; struct posix_fd *pfd = NULL; struct stat stbuf = { 0, }; char *hpath = NULL; int len = 0; int ret = 0; int op_errno = 0; struct dirent *entry = NULL; struct dirent scratch[2] = { { 0, }, }; ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL, fd=%p", fd); count = -1; errno = op_errno; goto out; } if (skip_dirs) { len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0); if (len <= 0) { errno = ESTALE; count = -1; goto out; } hpath = alloca(len + 256); /* NAME_MAX */ if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) { errno = ESTALE; count = -1; goto out; } len = strlen(hpath); hpath[len] = '/'; } if (!off) { rewinddir(dir); } else { seekdir(dir, off); #ifndef GF_LINUX_HOST_OS if ((u_long)telldir(dir) != off && off != pfd->dir_eof) { gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, P_MSG_DIR_OPERATION_FAILED, "seekdir(0x%llx) failed on dir=%p: " "Invalid argument (offset reused from " "another DIR * structure?)", off, dir); errno = EINVAL; count = -1; goto out; } #endif /* GF_LINUX_HOST_OS */ } while (filled <= size) { in_case = (u_long)telldir(dir); if (in_case == -1) { gf_msg(THIS->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED, "telldir failed on dir=%p", dir); goto out; } errno = 0; entry = sys_readdir(dir, scratch); if (!entry || errno != 0) { if (errno == EBADF) { gf_msg(THIS->name, GF_LOG_WARNING, errno, P_MSG_DIR_OPERATION_FAILED, "readdir failed on dir=%p", dir); goto out; } break; } #ifdef __NetBSD__ /* * NetBSD with UFS1 backend uses backing files for * extended attributes. They can be found in a * .attribute file located at the root of the filesystem * We hide it to glusterfs clients, since chaos will occur * when the cluster/dht xlator decides to distribute * exended attribute backing file across storage servers. */ if (__is_root_gfid(fd->inode->gfid) == 0 && (!strcmp(entry->d_name, ".attribute"))) continue; #endif /* __NetBSD__ */ if (__is_root_gfid(fd->inode->gfid) && (!strcmp(GF_HIDDEN_PATH, entry->d_name))) { continue; } if (skip_dirs) { if (DT_ISDIR(entry->d_type)) { continue; } else if (hpath) { strcpy(&hpath[len + 1], entry->d_name); ret = sys_lstat(hpath, &stbuf); if (!ret && S_ISDIR(stbuf.st_mode)) continue; } } this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) + strlen(entry->d_name) + 1; if (this_size + filled > size) { seekdir(dir, in_case); #ifndef GF_LINUX_HOST_OS if ((u_long)telldir(dir) != in_case && in_case != pfd->dir_eof) { gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, P_MSG_DIR_OPERATION_FAILED, "seekdir(0x%llx) failed on dir=%p: " "Invalid argument (offset reused from " "another DIR * structure?)", in_case, dir); errno = EINVAL; count = -1; goto out; } #endif /* GF_LINUX_HOST_OS */ break; } this_entry = gf_dirent_for_name(entry->d_name); if (!this_entry) { gf_msg(THIS->name, GF_LOG_ERROR, errno, P_MSG_GF_DIRENT_CREATE_FAILED, "could not create " "gf_dirent for entry %s", entry->d_name); goto out; } /* * we store the offset of next entry here, which is * probably not intended, but code using syncop_readdir() * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it * for directory read resumption. */ last_off = (u_long)telldir(dir); this_entry->d_off = last_off; this_entry->d_ino = entry->d_ino; this_entry->d_type = entry->d_type; list_add_tail(&this_entry->list, &entries->list); filled += this_size; count++; } if ((!sys_readdir(dir, scratch) && (errno == 0))) { /* Indicate EOF */ errno = ENOENT; /* Remember EOF offset for later detection */ pfd->dir_eof = (u_long)last_off; } out: return count; } dict_t * posix_entry_xattr_fill(xlator_t *this, inode_t *inode, fd_t *fd, char *entry_path, dict_t *dict, struct iatt *stbuf) { loc_t tmp_loc = { 0, }; /* if we don't send the 'loc', open-fd-count be a problem. */ tmp_loc.inode = inode; return posix_xattr_fill(this, entry_path, &tmp_loc, NULL, -1, dict, stbuf); } int posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) { gf_dirent_t *entry = NULL; inode_table_t *itable = NULL; inode_t *inode = NULL; char *hpath = NULL; int len = 0; struct iatt stbuf = { 0, }; uuid_t gfid; int ret = -1; if (list_empty(&entries->list)) return 0; itable = fd->inode->table; len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0); if (len <= 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, "Failed to create handle path, fd=%p, gfid=%s", fd, uuid_utoa(fd->inode->gfid)); return -1; } hpath = alloca(len + 256); /* NAME_MAX */ if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, "Failed to create handle path, fd=%p, gfid=%s", fd, uuid_utoa(fd->inode->gfid)); return -1; } len = strlen(hpath); hpath[len] = '/'; list_for_each_entry(entry, &entries->list, list) { inode = inode_grep(fd->inode->table, fd->inode, entry->d_name); if (inode) gf_uuid_copy(gfid, inode->gfid); else bzero(gfid, 16); strcpy(&hpath[len + 1], entry->d_name); ret = posix_pstat(this, inode, gfid, hpath, &stbuf, _gf_false); if (ret == -1) { if (inode) inode_unref(inode); continue; } posix_update_iatt_buf(&stbuf, -1, hpath, dict); if (!inode) inode = inode_find(itable, stbuf.ia_gfid); if (!inode) inode = inode_new(itable); entry->inode = inode; if (dict) { entry->dict = posix_entry_xattr_fill(this, entry->inode, fd, hpath, dict, &stbuf); } entry->d_stat = stbuf; if (stbuf.ia_ino) entry->d_ino = stbuf.ia_ino; if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) { /* The platform supports d_type but the underlying filesystem doesn't. We set d_type to the correct value from ia_type */ entry->d_type = gf_d_type_from_ia_type(stbuf.ia_type); } inode = NULL; } return 0; } int32_t posix_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) { struct posix_fd *pfd = NULL; DIR *dir = NULL; int ret = -1; int count = 0; int32_t op_ret = -1; int32_t op_errno = 0; gf_dirent_t entries; int32_t skip_dirs = 0; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); INIT_LIST_HEAD(&entries.list); ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, "pfd is NULL, fd=%p", fd); goto out; } dir = pfd->dir; if (!dir) { gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL, "dir is NULL for fd=%p", fd); op_errno = EINVAL; goto out; } /* When READDIR_FILTER option is set to on, we can filter out * directory's entry from the entry->list. */ ret = dict_get_int32(dict, GF_READDIR_SKIP_DIRS, &skip_dirs); LOCK(&fd->lock); { /* posix_fill_readdir performs multiple separate individual readdir() calls to fill up the buffer. In case of NFS where the same anonymous FD is shared between different applications, reading a common directory can result in the anonymous fd getting re-used unsafely between the two readdir requests (in two different io-threads). It would also help, in the future, to replace the loop around readdir() with a single large getdents() call. */ count = posix_fill_readdir(fd, dir, off, size, &entries, this, skip_dirs); } UNLOCK(&fd->lock); /* pick ENOENT to indicate EOF */ op_errno = errno; op_ret = count; if (whichop != GF_FOP_READDIRP) goto out; posix_readdirp_fill(this, fd, &entries, dict); out: if (whichop == GF_FOP_READDIR) STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL); else STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL); gf_dirent_free(&entries); return 0; } int32_t posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, dict_t *xdata) { posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIR, xdata); return 0; } int32_t posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, dict_t *dict) { gf_dirent_t entries; int32_t op_ret = -1, op_errno = 0; gf_dirent_t *entry = NULL; if ((dict != NULL) && (dict_get(dict, GET_ANCESTRY_DENTRY_KEY))) { INIT_LIST_HEAD(&entries.list); op_ret = posix_get_ancestry(this, fd->inode, &entries, NULL, POSIX_ANCESTRY_DENTRY, &op_errno, dict); if (op_ret >= 0) { op_ret = 0; list_for_each_entry(entry, &entries.list, list) { op_ret++; } } STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL); gf_dirent_free(&entries); return 0; } posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIRP, dict); return 0; } int32_t posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, int32_t len, dict_t *xdata) { char *alloc_buf = NULL; char *buf = NULL; int _fd = -1; struct posix_fd *pfd = NULL; int op_ret = -1; int op_errno = 0; int ret = 0; ssize_t bytes_read = 0; int32_t weak_checksum = 0; int32_t zerofillcheck = 0; /* Protocol version 4 uses 32 bytes i.e SHA256_DIGEST_LENGTH, so this is used. */ unsigned char md5_checksum[SHA256_DIGEST_LENGTH] = {0}; unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0}; unsigned char *checksum = NULL; struct posix_private *priv = NULL; dict_t *rsp_xdata = NULL; gf_boolean_t buf_has_zeroes = _gf_false; struct iatt preop = { 0, }; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); priv = this->private; alloc_buf = _page_aligned_alloc(len, &buf); if (!alloc_buf) { op_errno = ENOMEM; goto out; } rsp_xdata = dict_new(); if (!rsp_xdata) { op_errno = ENOMEM; goto out; } ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL, "pfd is NULL, fd=%p", fd); goto out; } _fd = pfd->fd; if (xdata) { op_ret = posix_fdstat(this, fd->inode, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "pre-operation fstat failed on fd=%p", fd); goto out; } op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, &rsp_xdata, _gf_false); if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); op_errno = EIO; goto out; } } LOCK(&fd->lock); { if (priv->aio_capable && priv->aio_init_done) __posix_fd_set_odirect(fd, pfd, 0, offset, len); bytes_read = sys_pread(_fd, buf, len, offset); if (bytes_read < 0) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PREAD_FAILED, "pread of %d bytes returned %zd", len, bytes_read); op_errno = errno; } } UNLOCK(&fd->lock); if (bytes_read < 0) goto out; if (xdata && dict_get_int32(xdata, "check-zero-filled", &zerofillcheck) == 0) { buf_has_zeroes = (mem_0filled(buf, bytes_read)) ? _gf_false : _gf_true; ret = dict_set_uint32(rsp_xdata, "buf-has-zeroes", buf_has_zeroes); if (ret) { gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, "%s: Failed to set " "dictionary value for key: %s", uuid_utoa(fd->inode->gfid), "buf-has-zeroes"); op_errno = -ret; goto out; } } weak_checksum = gf_rsync_weak_checksum((unsigned char *)buf, (size_t)ret); if (priv->fips_mode_rchecksum) { ret = dict_set_int32(rsp_xdata, "fips-mode-rchecksum", 1); if (ret) { gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, "%s: Failed to set " "dictionary value for key: %s", uuid_utoa(fd->inode->gfid), "fips-mode-rchecksum"); goto out; } checksum = strong_checksum; gf_rsync_strong_checksum((unsigned char *)buf, (size_t)bytes_read, (unsigned char *)checksum); } else { checksum = md5_checksum; gf_rsync_md5_checksum((unsigned char *)buf, (size_t)bytes_read, (unsigned char *)checksum); } op_ret = 0; posix_set_ctime(frame, this, NULL, _fd, fd->inode, NULL); out: STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum, checksum, rsp_xdata); if (rsp_xdata) dict_unref(rsp_xdata); GF_FREE(alloc_buf); return 0; } int posix_forget(xlator_t *this, inode_t *inode) { int ret = 0; char *unlink_path = NULL; uint64_t ctx_uint1 = 0; uint64_t ctx_uint2 = 0; posix_inode_ctx_t *ctx = NULL; posix_mdata_t *mdata = NULL; struct posix_private *priv_posix = NULL; priv_posix = (struct posix_private *)this->private; if (!priv_posix) return 0; ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2); if (!ctx_uint1) goto check_ctx2; ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1; if (ctx->unlink_flag == GF_UNLINK_TRUE) { POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid, unlink_path); if (!unlink_path) { gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, "Failed to remove gfid :%s", uuid_utoa(inode->gfid)); ret = -1; goto ctx_free; } ret = sys_unlink(unlink_path); } ctx_free: pthread_mutex_destroy(&ctx->xattrop_lock); pthread_mutex_destroy(&ctx->write_atomic_lock); pthread_mutex_destroy(&ctx->pgfid_lock); GF_FREE(ctx); check_ctx2: if (ctx_uint2) { mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2; } GF_FREE(mdata); return ret; }