/* Copyright (c) 2008-2012 Red Hat, Inc. This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser General Public License, version 3 or any later version (LGPLv3 or later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ #include #include #include #include #include #include "io-cache.h" #include "ioc-mem-types.h" #include #include #include #include "io-cache-messages.h" int ioc_log2_page_size; uint32_t ioc_get_priority(ioc_table_t *table, const char *path); struct volume_options options[]; static uint32_t ioc_hashfn(void *data, int len) { off_t offset; offset = *(off_t *)data; return (offset >> ioc_log2_page_size); } /* TODO: This function is not used, uncomment when we find a usage for this function. static ioc_inode_t * ioc_inode_reupdate (ioc_inode_t *ioc_inode) { ioc_table_t *table = NULL; table = ioc_inode->table; list_add_tail (&ioc_inode->inode_lru, &table->inode_lru[ioc_inode->weight]); return ioc_inode; } static ioc_inode_t * ioc_get_inode (dict_t *dict, char *name) { ioc_inode_t *ioc_inode = NULL; data_t *ioc_inode_data = NULL; ioc_table_t *table = NULL; ioc_inode_data = dict_get (dict, name); if (ioc_inode_data) { ioc_inode = data_to_ptr (ioc_inode_data); table = ioc_inode->table; ioc_table_lock (table); { if (list_empty (&ioc_inode->inode_lru)) { ioc_inode = ioc_inode_reupdate (ioc_inode); } } ioc_table_unlock (table); } return ioc_inode; } */ int ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode, struct iovec *vector, int32_t count, int op_ret, off_t offset) { size_t size = 0; off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0, write_offset = 0; off_t page_offset = 0, page_end = 0; ioc_page_t *trav = NULL; size = iov_length(vector, count); size = min(size, op_ret); rounded_offset = gf_floor(offset, ioc_inode->table->page_size); rounded_end = gf_roof(offset + size, ioc_inode->table->page_size); trav_offset = rounded_offset; ioc_inode_lock(ioc_inode); { while (trav_offset < rounded_end) { trav = __ioc_page_get(ioc_inode, trav_offset); if (trav && trav->ready) { if (trav_offset == rounded_offset) page_offset = offset - rounded_offset; else page_offset = 0; if ((trav_offset + ioc_inode->table->page_size) >= rounded_end) { page_end = trav->size - (rounded_end - (offset + size)); } else { page_end = trav->size; } iov_range_copy(trav->vector, trav->count, page_offset, vector, count, write_offset, page_end - page_offset); } else if (trav) { if (!trav->waitq) ioc_inode->table->cache_used -= __ioc_page_destroy(trav); } if (trav_offset == rounded_offset) write_offset += (ioc_inode->table->page_size - (offset - rounded_offset)); else write_offset += ioc_inode->table->page_size; trav_offset += ioc_inode->table->page_size; } } ioc_inode_unlock(ioc_inode); return 0; } int32_t ioc_inode_need_revalidate(ioc_inode_t *ioc_inode) { int8_t need_revalidate = 0; struct timeval tv = { 0, }; ioc_table_t *table = NULL; table = ioc_inode->table; gettimeofday(&tv, NULL); if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout) need_revalidate = 1; return need_revalidate; } /* * __ioc_inode_flush - flush all the cached pages of the given inode * * @ioc_inode: * * assumes lock is held */ int64_t __ioc_inode_flush(ioc_inode_t *ioc_inode) { ioc_page_t *curr = NULL, *next = NULL; int64_t destroy_size = 0; int64_t ret = 0; list_for_each_entry_safe(curr, next, &ioc_inode->cache.page_lru, page_lru) { ret = __ioc_page_destroy(curr); if (ret != -1) destroy_size += ret; } return destroy_size; } void ioc_inode_flush(ioc_inode_t *ioc_inode) { int64_t destroy_size = 0; ioc_inode_lock(ioc_inode); { destroy_size = __ioc_inode_flush(ioc_inode); } ioc_inode_unlock(ioc_inode); if (destroy_size) { ioc_table_lock(ioc_inode->table); { ioc_inode->table->cache_used -= destroy_size; } ioc_table_unlock(ioc_inode->table); } return; } int32_t ioc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preop, struct iatt *postop, dict_t *xdata) { STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata); return 0; } int32_t ioc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { uint64_t ioc_inode = 0; inode_ctx_get(loc->inode, this, &ioc_inode); if (ioc_inode && ((valid & GF_SET_ATTR_ATIME) || (valid & GF_SET_ATTR_MTIME))) ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); STACK_WIND(frame, ioc_setattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); return 0; } int32_t ioc_inode_update(xlator_t *this, inode_t *inode, char *path, struct iatt *iabuf) { ioc_table_t *table = NULL; uint64_t tmp_ioc_inode = 0; ioc_inode_t *ioc_inode = NULL; uint32_t weight = 0xffffffff; gf_boolean_t cache_still_valid = _gf_false; if (!this || !inode) goto out; table = this->private; LOCK(&inode->lock); { (void)__inode_ctx_get(inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; if (!ioc_inode) { weight = ioc_get_priority(table, path); ioc_inode = ioc_inode_create(table, inode, weight); (void)__inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode); } } UNLOCK(&inode->lock); ioc_inode_lock(ioc_inode); { if (ioc_inode->cache.mtime == 0) { ioc_inode->cache.mtime = iabuf->ia_mtime; ioc_inode->cache.mtime_nsec = iabuf->ia_mtime_nsec; } ioc_inode->ia_size = iabuf->ia_size; } ioc_inode_unlock(ioc_inode); cache_still_valid = ioc_cache_still_valid(ioc_inode, iabuf); if (!cache_still_valid) { ioc_inode_flush(ioc_inode); } ioc_table_lock(ioc_inode->table); { list_move_tail(&ioc_inode->inode_lru, &table->inode_lru[ioc_inode->weight]); } ioc_table_unlock(ioc_inode->table); out: return 0; } int32_t ioc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xdata, struct iatt *postparent) { ioc_local_t *local = NULL; if (op_ret != 0) goto out; local = frame->local; if (local == NULL) { op_ret = -1; op_errno = EINVAL; goto out; } if (!this || !this->private) { op_ret = -1; op_errno = EINVAL; goto out; } ioc_inode_update(this, inode, (char *)local->file_loc.path, stbuf); out: if (frame->local != NULL) { local = frame->local; loc_wipe(&local->file_loc); } STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata, postparent); return 0; } int32_t ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; local = mem_get0(this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } ret = loc_copy(&local->file_loc, loc); if (ret != 0) { op_errno = ENOMEM; gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } frame->local = local; STACK_WIND(frame, ioc_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xdata); return 0; unwind: if (local != NULL) { loc_wipe(&local->file_loc); mem_put(local); } STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); return 0; } /* * ioc_forget - * * @frame: * @this: * @inode: * */ int32_t ioc_forget(xlator_t *this, inode_t *inode) { uint64_t ioc_inode = 0; inode_ctx_get(inode, this, &ioc_inode); if (ioc_inode) ioc_inode_destroy((ioc_inode_t *)(long)ioc_inode); return 0; } static int32_t ioc_invalidate(xlator_t *this, inode_t *inode) { uint64_t ioc_inode = 0; inode_ctx_get(inode, this, &ioc_inode); if (ioc_inode) ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode); return 0; } /* * ioc_cache_validate_cbk - * * @frame: * @cookie: * @this: * @op_ret: * @op_errno: * @buf * */ int32_t ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *stbuf, dict_t *xdata) { ioc_local_t *local = NULL; ioc_inode_t *ioc_inode = NULL; size_t destroy_size = 0; struct iatt *local_stbuf = NULL; struct timeval tv = { 0, }; local = frame->local; ioc_inode = local->inode; local_stbuf = stbuf; if ((op_ret == -1) || ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) { gf_msg_debug(ioc_inode->table->xl->name, 0, "cache for inode(%p) is invalid. flushing all pages", ioc_inode); /* NOTE: only pages with no waiting frames are flushed by * ioc_inode_flush. page_fault will be generated for all * the pages which have waiting frames by ioc_inode_wakeup() */ ioc_inode_lock(ioc_inode); { destroy_size = __ioc_inode_flush(ioc_inode); if (op_ret >= 0) { ioc_inode->cache.mtime = stbuf->ia_mtime; ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec; } } ioc_inode_unlock(ioc_inode); local_stbuf = NULL; } if (destroy_size) { ioc_table_lock(ioc_inode->table); { ioc_inode->table->cache_used -= destroy_size; } ioc_table_unlock(ioc_inode->table); } if (op_ret < 0) local_stbuf = NULL; gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); } ioc_inode_unlock(ioc_inode); ioc_inode_wakeup(frame, ioc_inode, local_stbuf); /* any page-fault initiated by ioc_inode_wakeup() will have its own * fd_ref on fd, safe to unref validate frame's private copy */ fd_unref(local->fd); dict_unref(local->xattr_req); STACK_DESTROY(frame->root); return 0; } int32_t ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page) { ioc_waitq_t *waiter = NULL, *trav = NULL; uint32_t page_found = 0; int32_t ret = 0; trav = ioc_inode->waitq; while (trav) { if (trav->data == page) { page_found = 1; break; } trav = trav->next; } if (!page_found) { waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t); if (waiter == NULL) { gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); ret = -ENOMEM; goto out; } waiter->data = page; waiter->next = ioc_inode->waitq; ioc_inode->waitq = waiter; } out: return ret; } /* * ioc_cache_validate - * * @frame: * @ioc_inode: * @fd: * */ int32_t ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, ioc_page_t *page) { call_frame_t *validate_frame = NULL; ioc_local_t *validate_local = NULL; ioc_local_t *local = NULL; int32_t ret = 0; local = frame->local; validate_local = mem_get0(THIS->local_pool); if (validate_local == NULL) { ret = -1; local->op_ret = -1; local->op_errno = ENOMEM; gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto out; } validate_frame = copy_frame(frame); if (validate_frame == NULL) { ret = -1; local->op_ret = -1; local->op_errno = ENOMEM; mem_put(validate_local); gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto out; } validate_local->fd = fd_ref(fd); validate_local->inode = ioc_inode; if (local && local->xattr_req) validate_local->xattr_req = dict_ref(local->xattr_req); validate_frame->local = validate_local; STACK_WIND(validate_frame, ioc_cache_validate_cbk, FIRST_CHILD(frame->this), FIRST_CHILD(frame->this)->fops->fstat, fd, validate_local->xattr_req); out: return ret; } static uint32_t is_match(const char *path, const char *pattern) { int32_t ret = 0; ret = fnmatch(pattern, path, FNM_NOESCAPE); return (ret == 0); } uint32_t ioc_get_priority(ioc_table_t *table, const char *path) { uint32_t priority = 1; struct ioc_priority *curr = NULL; if (list_empty(&table->priority_list) || !path) return priority; priority = 0; list_for_each_entry(curr, &table->priority_list, list) { if (is_match(path, curr->pattern)) priority = curr->priority; } return priority; } /* * ioc_open_cbk - open callback for io cache * * @frame: call frame * @cookie: * @this: * @op_ret: * @op_errno: * @fd: * */ int32_t ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_local_t *local = NULL; ioc_table_t *table = NULL; ioc_inode_t *ioc_inode = NULL; local = frame->local; if (!this || !this->private) { op_ret = -1; op_errno = EINVAL; goto out; } table = this->private; if (op_ret != -1) { inode_ctx_get(fd->inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; // TODO: see why inode context is NULL and handle it. if (!ioc_inode) { gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); goto out; } ioc_table_lock(ioc_inode->table); { list_move_tail(&ioc_inode->inode_lru, &table->inode_lru[ioc_inode->weight]); } ioc_table_unlock(ioc_inode->table); ioc_inode_lock(ioc_inode); { if ((table->min_file_size > ioc_inode->ia_size) || ((table->max_file_size > 0) && (table->max_file_size < ioc_inode->ia_size))) { fd_ctx_set(fd, this, 1); } } ioc_inode_unlock(ioc_inode); /* If O_DIRECT open, we disable caching on it */ if ((local->flags & O_DIRECT)) { /* O_DIRECT is only for one fd, not the inode * as a whole */ fd_ctx_set(fd, this, 1); } } out: mem_put(local); frame->local = NULL; STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); return 0; } /* * ioc_create_cbk - create callback for io cache * * @frame: call frame * @cookie: * @this: * @op_ret: * @op_errno: * @fd: * @inode: * @buf: * */ int32_t ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; ioc_inode_t *ioc_inode = NULL; uint32_t weight = 0xffffffff; const char *path = NULL; int ret = -1; local = frame->local; if (!this || !this->private) { op_ret = -1; op_errno = EINVAL; goto out; } table = this->private; path = local->file_loc.path; if (op_ret != -1) { /* assign weight */ weight = ioc_get_priority(table, path); ioc_inode = ioc_inode_create(table, inode, weight); ioc_inode_lock(ioc_inode); { ioc_inode->cache.mtime = buf->ia_mtime; ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec; ioc_inode->ia_size = buf->ia_size; if ((table->min_file_size > ioc_inode->ia_size) || ((table->max_file_size > 0) && (table->max_file_size < ioc_inode->ia_size))) { ret = fd_ctx_set(fd, this, 1); if (ret) gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, IO_CACHE_MSG_SET_FD_FAILED, "path=%s", local->file_loc.path, NULL); } } ioc_inode_unlock(ioc_inode); inode_ctx_put(fd->inode, this, (uint64_t)(long)ioc_inode); /* If O_DIRECT open, we disable caching on it */ if (local->flags & O_DIRECT) { /* * O_DIRECT is only for one fd, not the inode * as a whole */ ret = fd_ctx_set(fd, this, 1); if (ret) gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, IO_CACHE_MSG_SET_FD_FAILED, "path=%s", local->file_loc.path, NULL); } /* if weight == 0, we disable caching on it */ if (!weight) { /* we allow a pattern-matched cache disable this way */ ret = fd_ctx_set(fd, this, 1); if (ret) gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, IO_CACHE_MSG_SET_FD_FAILED, "path=%s", local->file_loc.path, NULL); } } out: frame->local = NULL; mem_put(local); STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, preparent, postparent, xdata); return 0; } int32_t ioc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; ioc_inode_t *ioc_inode = NULL; uint32_t weight = 0xffffffff; const char *path = NULL; local = frame->local; if (!this || !this->private) { op_ret = -1; op_errno = EINVAL; goto out; } table = this->private; path = local->file_loc.path; if (op_ret != -1) { /* assign weight */ weight = ioc_get_priority(table, path); ioc_inode = ioc_inode_create(table, inode, weight); ioc_inode_lock(ioc_inode); { ioc_inode->cache.mtime = buf->ia_mtime; ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec; ioc_inode->ia_size = buf->ia_size; } ioc_inode_unlock(ioc_inode); inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode); } out: frame->local = NULL; loc_wipe(&local->file_loc); mem_put(local); STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, postparent, xdata); return 0; } int ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; local = mem_get0(this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } ret = loc_copy(&local->file_loc, loc); if (ret != 0) { op_errno = ENOMEM; gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } frame->local = local; STACK_WIND(frame, ioc_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); return 0; unwind: if (local != NULL) { loc_wipe(&local->file_loc); mem_put(local); } STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); return 0; } /* * ioc_open - open fop for io cache * @frame: * @this: * @loc: * @flags: * */ int32_t ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; local = mem_get0(this->local_pool); if (local == NULL) { gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL); return 0; } local->flags = flags; local->file_loc.path = loc->path; local->file_loc.inode = loc->inode; frame->local = local; STACK_WIND(frame, ioc_open_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); return 0; } /* * ioc_create - create fop for io cache * * @frame: * @this: * @pathname: * @flags: * @mode: * */ int32_t ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; local = mem_get0(this->local_pool); if (local == NULL) { gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, NULL); return 0; } local->flags = flags; local->file_loc.path = loc->path; frame->local = local; STACK_WIND(frame, ioc_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, xdata); return 0; } /* * ioc_release - release fop for io cache * * @frame: * @this: * @fd: * */ int32_t ioc_release(xlator_t *this, fd_t *fd) { return 0; } int32_t ioc_need_prune(ioc_table_t *table) { int64_t cache_difference = 0; ioc_table_lock(table); { cache_difference = table->cache_used - table->cache_size; } ioc_table_unlock(table); if (cache_difference > 0) return 1; else return 0; } /* * ioc_dispatch_requests - * * @frame: * @inode: * * */ void ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, off_t offset, size_t size) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; ioc_page_t *trav = NULL; ioc_waitq_t *waitq = NULL; off_t rounded_offset = 0; off_t rounded_end = 0; off_t trav_offset = 0; int32_t fault = 0; size_t trav_size = 0; off_t local_offset = 0; int32_t ret = -1; int8_t need_validate = 0; int8_t might_need_validate = 0; /* * if a page exists, do we need * to validate it? */ local = frame->local; table = ioc_inode->table; rounded_offset = gf_floor(offset, table->page_size); rounded_end = gf_roof(offset + size, table->page_size); trav_offset = rounded_offset; /* once a frame does read, it should be waiting on something */ local->wait_count++; /* Requested region can fall in three different pages, * 1. Ready - region is already in cache, we just have to serve it. * 2. In-transit - page fault has been generated on this page, we need * to wait till the page is ready * 3. Fault - page is not in cache, we have to generate a page fault */ might_need_validate = ioc_inode_need_revalidate(ioc_inode); while (trav_offset < rounded_end) { ioc_inode_lock(ioc_inode); { /* look for requested region in the cache */ trav = __ioc_page_get(ioc_inode, trav_offset); local_offset = max(trav_offset, offset); trav_size = min(((offset + size) - local_offset), table->page_size); if (!trav) { /* page not in cache, we need to generate page * fault */ trav = __ioc_page_create(ioc_inode, trav_offset); fault = 1; if (!trav) { gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); local->op_ret = -1; local->op_errno = ENOMEM; ioc_inode_unlock(ioc_inode); goto out; } } __ioc_wait_on_page(trav, frame, local_offset, trav_size); if (trav->ready) { /* page found in cache */ if (!might_need_validate && !ioc_inode->waitq) { /* fresh enough */ gf_msg_trace(frame->this->name, 0, "cache hit for " "trav_offset=%" PRId64 "/local_" "offset=%" PRId64 "", trav_offset, local_offset); waitq = __ioc_page_wakeup(trav, trav->op_errno); } else { /* if waitq already exists, fstat * revalidate is * already on the way */ if (!ioc_inode->waitq) { need_validate = 1; } ret = ioc_wait_on_inode(ioc_inode, trav); if (ret < 0) { local->op_ret = -1; local->op_errno = -ret; need_validate = 0; waitq = __ioc_page_wakeup(trav, trav->op_errno); ioc_inode_unlock(ioc_inode); ioc_waitq_return(waitq); waitq = NULL; goto out; } } } } ioc_inode_unlock(ioc_inode); ioc_waitq_return(waitq); waitq = NULL; if (fault) { fault = 0; /* new page created, increase the table->cache_used */ ioc_page_fault(ioc_inode, frame, fd, trav_offset); } if (need_validate) { need_validate = 0; gf_msg_trace(frame->this->name, 0, "sending validate request for " "inode(%s) at offset=%" PRId64 "", uuid_utoa(fd->inode->gfid), trav_offset); ret = ioc_cache_validate(frame, ioc_inode, fd, trav); if (ret == -1) { ioc_inode_lock(ioc_inode); { waitq = __ioc_page_wakeup(trav, trav->op_errno); } ioc_inode_unlock(ioc_inode); ioc_waitq_return(waitq); waitq = NULL; goto out; } } trav_offset += table->page_size; } out: ioc_frame_return(frame); if (ioc_need_prune(ioc_inode->table)) { ioc_prune(ioc_inode->table); } return; } /* * ioc_readv - * * @frame: * @this: * @fd: * @size: * @offset: * */ int32_t ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_inode_t *ioc_inode = NULL; ioc_local_t *local = NULL; uint32_t weight = 0; ioc_table_t *table = NULL; int32_t op_errno = EINVAL; if (!this) { goto out; } inode_ctx_get(fd->inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; if (!ioc_inode) { /* caching disabled, go ahead with normal readv */ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); return 0; } if (flags & O_DIRECT) { /* disable caching for this fd, if O_DIRECT is used */ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); return 0; } table = this->private; if (!table) { gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL, NULL); op_errno = EINVAL; goto out; } ioc_inode_lock(ioc_inode); { if (!ioc_inode->cache.page_table) { ioc_inode->cache.page_table = rbthash_table_init( this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, ioc_hashfn, NULL, 0, table->mem_pool); if (ioc_inode->cache.page_table == NULL) { op_errno = ENOMEM; ioc_inode_unlock(ioc_inode); goto out; } } } ioc_inode_unlock(ioc_inode); if (!fd_ctx_get(fd, this, NULL)) { /* disable caching for this fd, go ahead with normal readv */ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); return 0; } local = mem_get0(this->local_pool); if (local == NULL) { gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); op_errno = ENOMEM; goto out; } INIT_LIST_HEAD(&local->fill_list); frame->local = local; local->pending_offset = offset; local->pending_size = size; local->offset = offset; local->size = size; local->inode = ioc_inode; local->xattr_req = dict_ref(xdata); gf_msg_trace(this->name, 0, "NEW REQ (%p) offset " "= %" PRId64 " && size = %" GF_PRI_SIZET "", frame, offset, size); weight = ioc_inode->weight; ioc_table_lock(ioc_inode->table); { list_move_tail(&ioc_inode->inode_lru, &ioc_inode->table->inode_lru[weight]); } ioc_table_unlock(ioc_inode->table); ioc_dispatch_requests(frame, ioc_inode, fd, offset, size); return 0; out: STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); return 0; } /* * ioc_writev_cbk - * * @frame: * @cookie: * @this: * @op_ret: * @op_errno: * */ int32_t ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; local = frame->local; frame->local = NULL; inode_ctx_get(local->fd->inode, this, &ioc_inode); if (op_ret >= 0) { ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector, local->op_ret, op_ret, local->offset); } STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); if (local->iobref) { iobref_unref(local->iobref); GF_FREE(local->vector); } mem_put(local); return 0; } /* * ioc_writev * * @frame: * @this: * @fd: * @vector: * @count: * @offset: * */ int32_t ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; local = mem_get0(this->local_pool); if (local == NULL) { gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } /* TODO: why is it not fd_ref'ed */ local->fd = fd; frame->local = local; inode_ctx_get(fd->inode, this, &ioc_inode); if (ioc_inode) { local->iobref = iobref_ref(iobref); local->vector = iov_dup(vector, count); local->op_ret = count; local->offset = offset; } STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, flags, iobref, xdata); return 0; } /* * ioc_truncate_cbk - * * @frame: * @cookie: * @this: * @op_ret: * @op_errno: * @buf: * */ int32_t ioc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); return 0; } /* * ioc_ftruncate_cbk - * * @frame: * @cookie: * @this: * @op_ret: * @op_errno: * @buf: * */ int32_t ioc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); return 0; } /* * ioc_truncate - * * @frame: * @this: * @loc: * @offset: * */ int32_t ioc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { uint64_t ioc_inode = 0; inode_ctx_get(loc->inode, this, &ioc_inode); if (ioc_inode) ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); STACK_WIND(frame, ioc_truncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; } /* * ioc_ftruncate - * * @frame: * @this: * @fd: * @offset: * */ int32_t ioc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { uint64_t ioc_inode = 0; inode_ctx_get(fd->inode, this, &ioc_inode); if (ioc_inode) ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); STACK_WIND(frame, ioc_ftruncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); return 0; } int32_t ioc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata); return 0; } int32_t ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { ioc_inode_t *ioc_inode = NULL; uint64_t tmp_inode = 0; struct timeval tv = { 0, }; inode_ctx_get(fd->inode, this, &tmp_inode); ioc_inode = (ioc_inode_t *)(long)tmp_inode; if (!ioc_inode) { gf_msg_debug(this->name, EBADFD, "inode context is NULL: returning EBADFD"); STACK_UNWIND_STRICT(lk, frame, -1, EBADFD, NULL, NULL); return 0; } gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); } ioc_inode_unlock(ioc_inode); STACK_WIND(frame, ioc_lk_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata); return 0; } int ioc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) { gf_dirent_t *entry = NULL; char *path = NULL; fd_t *fd = NULL; fd = frame->local; frame->local = NULL; if (op_ret <= 0) goto unwind; list_for_each_entry(entry, &entries->list, list) { inode_path(fd->inode, entry->d_name, &path); ioc_inode_update(this, entry->inode, path, &entry->d_stat); GF_FREE(path); path = NULL; } unwind: STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); return 0; } int ioc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *dict) { frame->local = fd; STACK_WIND(frame, ioc_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); return 0; } static int32_t ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *pre, struct iatt *post, dict_t *xdata) { STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata); return 0; } static int32_t ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { uint64_t ioc_inode = 0; inode_ctx_get(fd->inode, this, &ioc_inode); if (ioc_inode) ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); return 0; } static int32_t ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *pre, struct iatt *post, dict_t *xdata) { STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata); return 0; } static int32_t ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { uint64_t ioc_inode = 0; inode_ctx_get(fd->inode, this, &ioc_inode); if (ioc_inode) ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); return 0; } int32_t ioc_get_priority_list(const char *opt_str, struct list_head *first) { int32_t max_pri = 1; char *tmp_str = NULL; char *tmp_str1 = NULL; char *tmp_str2 = NULL; char *dup_str = NULL; char *stripe_str = NULL; char *pattern = NULL; char *priority = NULL; char *string = NULL; struct ioc_priority *curr = NULL, *tmp = NULL; string = gf_strdup(opt_str); if (string == NULL) { max_pri = -1; goto out; } /* Get the pattern for cache priority. * "option priority *.jpg:1,abc*:2" etc */ /* TODO: inode_lru in table is statically hard-coded to 5, * should be changed to run-time configuration */ stripe_str = strtok_r(string, ",", &tmp_str); while (stripe_str) { curr = GF_CALLOC(1, sizeof(struct ioc_priority), gf_ioc_mt_ioc_priority); if (curr == NULL) { max_pri = -1; goto out; } list_add_tail(&curr->list, first); dup_str = gf_strdup(stripe_str); if (dup_str == NULL) { max_pri = -1; goto out; } pattern = strtok_r(dup_str, ":", &tmp_str1); if (!pattern) { max_pri = -1; goto out; } priority = strtok_r(NULL, ":", &tmp_str1); if (!priority) { max_pri = -1; goto out; } gf_msg_trace("io-cache", 0, "ioc priority : pattern %s : priority %s", pattern, priority); curr->pattern = gf_strdup(pattern); if (curr->pattern == NULL) { max_pri = -1; goto out; } curr->priority = strtol(priority, &tmp_str2, 0); if (tmp_str2 && (*tmp_str2)) { max_pri = -1; goto out; } else { max_pri = max(max_pri, curr->priority); } GF_FREE(dup_str); dup_str = NULL; stripe_str = strtok_r(NULL, ",", &tmp_str); } out: GF_FREE(string); GF_FREE(dup_str); if (max_pri == -1) { list_for_each_entry_safe(curr, tmp, first, list) { list_del_init(&curr->list); GF_FREE(curr->pattern); GF_FREE(curr); } } return max_pri; } int32_t mem_acct_init(xlator_t *this) { int ret = -1; if (!this) return ret; ret = xlator_mem_acct_init(this, gf_ioc_mt_end + 1); if (ret != 0) { gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL); return ret; } return ret; } static gf_boolean_t check_cache_size_ok(xlator_t *this, uint64_t cache_size) { gf_boolean_t ret = _gf_true; uint64_t total_mem = 0; uint64_t max_cache_size = 0; volume_option_t *opt = NULL; GF_ASSERT(this); opt = xlator_volume_option_get(this, "cache-size"); if (!opt) { ret = _gf_false; gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL); goto out; } total_mem = get_mem_size(); if (-1 == total_mem) max_cache_size = opt->max; else max_cache_size = total_mem; gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size); if (cache_size > max_cache_size) { ret = _gf_false; gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, "Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64, max_cache_size, NULL); goto out; } out: return ret; } int reconfigure(xlator_t *this, dict_t *options) { data_t *data = NULL; ioc_table_t *table = NULL; int ret = -1; uint64_t cache_size_new = 0; if (!this || !this->private) goto out; table = this->private; ioc_table_lock(table); { GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, unlock); GF_OPTION_RECONF("cache-timeout", table->cache_timeout, options, int32, unlock); data = dict_get(options, "priority"); if (data) { char *option_list = data_to_str(data); gf_msg_trace(this->name, 0, "option path %s", option_list); /* parse the list of pattern:priority */ table->max_pri = ioc_get_priority_list(option_list, &table->priority_list); if (table->max_pri == -1) { goto unlock; } table->max_pri++; } GF_OPTION_RECONF("max-file-size", table->max_file_size, options, size_uint64, unlock); GF_OPTION_RECONF("min-file-size", table->min_file_size, options, size_uint64, unlock); if ((table->max_file_size <= UINT64_MAX) && (table->min_file_size > table->max_file_size)) { gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD, "minimum-size=%" PRIu64, table->min_file_size, "maximum-size=%" PRIu64, table->max_file_size, NULL); goto unlock; } GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64, unlock); if (!check_cache_size_ok(this, cache_size_new)) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL); goto unlock; } table->cache_size = cache_size_new; ret = 0; } unlock: ioc_table_unlock(table); out: return ret; } /* * init - * @this: * */ int32_t init(xlator_t *this) { ioc_table_t *table = NULL; dict_t *xl_options = NULL; uint32_t index = 0; int32_t ret = -1; glusterfs_ctx_t *ctx = NULL; data_t *data = 0; uint32_t num_pages = 0; xl_options = this->options; if (!this->children || this->children->next) { gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL); goto out; } if (!this->parents) { gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED, NULL); } table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t); if (table == NULL) { gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); goto out; } table->xl = this; table->page_size = this->ctx->page_size; GF_OPTION_INIT("pass-through", this->pass_through, bool, out); GF_OPTION_INIT("cache-size", table->cache_size, size_uint64, out); GF_OPTION_INIT("cache-timeout", table->cache_timeout, int32, out); GF_OPTION_INIT("min-file-size", table->min_file_size, size_uint64, out); GF_OPTION_INIT("max-file-size", table->max_file_size, size_uint64, out); if (!check_cache_size_ok(this, table->cache_size)) { ret = -1; goto out; } INIT_LIST_HEAD(&table->priority_list); table->max_pri = 1; data = dict_get(xl_options, "priority"); if (data) { char *option_list = data_to_str(data); gf_msg_trace(this->name, 0, "option path %s", option_list); /* parse the list of pattern:priority */ table->max_pri = ioc_get_priority_list(option_list, &table->priority_list); if (table->max_pri == -1) { goto out; } } table->max_pri++; INIT_LIST_HEAD(&table->inodes); if ((table->max_file_size <= UINT64_MAX) && (table->min_file_size > table->max_file_size)) { gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, "minimum-size=%" PRIu64, table->min_file_size, "maximum-size=%" PRIu64, table->max_file_size, NULL); goto out; } table->inode_lru = GF_CALLOC(table->max_pri, sizeof(struct list_head), gf_ioc_mt_list_head); if (table->inode_lru == NULL) { goto out; } for (index = 0; index < (table->max_pri); index++) INIT_LIST_HEAD(&table->inode_lru[index]); this->local_pool = mem_pool_new(ioc_local_t, 64); if (!this->local_pool) { ret = -1; gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL); goto out; } pthread_mutex_init(&table->table_lock, NULL); this->private = table; num_pages = (table->cache_size / table->page_size) + ((table->cache_size % table->page_size) ? 1 : 0); table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages); if (!table->mem_pool) { gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL); goto out; } ret = 0; ctx = this->ctx; ioc_log2_page_size = log_base2(ctx->page_size); out: if (ret == -1) { if (table != NULL) { GF_FREE(table->inode_lru); GF_FREE(table); } } return ret; } void ioc_page_waitq_dump(ioc_page_t *page, char *prefix) { ioc_waitq_t *trav = NULL; call_frame_t *frame = NULL; int32_t i = 0; char key[GF_DUMP_MAX_BUF_LEN] = { 0, }; trav = page->waitq; while (trav) { frame = trav->data; sprintf(key, "waitq.frame[%d]", i++); gf_proc_dump_write(key, "%" PRId64, frame->root->unique); trav = trav->next; } } void __ioc_inode_waitq_dump(ioc_inode_t *ioc_inode, char *prefix) { ioc_waitq_t *trav = NULL; ioc_page_t *page = NULL; int32_t i = 0; char key[GF_DUMP_MAX_BUF_LEN] = { 0, }; trav = ioc_inode->waitq; while (trav) { page = trav->data; sprintf(key, "cache-validation-waitq.page[%d].offset", i++); gf_proc_dump_write(key, "%" PRId64, page->offset); trav = trav->next; } } void __ioc_page_dump(ioc_page_t *page, char *prefix) { int ret = -1; if (!page) return; /* ioc_page_lock can be used to hold the mutex. But in statedump * its better to use trylock to avoid deadlocks. */ ret = pthread_mutex_trylock(&page->page_lock); if (ret) goto out; { gf_proc_dump_write("offset", "%" PRId64, page->offset); gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size); gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no"); gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no"); ioc_page_waitq_dump(page, prefix); } pthread_mutex_unlock(&page->page_lock); out: if (ret && page) gf_proc_dump_write("Unable to dump the page information", "(Lock acquisition failed) %p", page); return; } void __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) { off_t offset = 0; ioc_table_t *table = NULL; ioc_page_t *page = NULL; int i = 0; char key[GF_DUMP_MAX_BUF_LEN] = { 0, }; char timestr[256] = { 0, }; if ((ioc_inode == NULL) || (prefix == NULL)) { goto out; } table = ioc_inode->table; if (ioc_inode->cache.tv.tv_sec) { gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.tv.tv_sec, gf_timefmt_FT); snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); gf_proc_dump_write("last-cache-validation-time", "%s", timestr); } for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) { page = __ioc_page_get(ioc_inode, offset); if (page == NULL) { continue; } sprintf(key, "inode.cache.page[%d]", i++); __ioc_page_dump(page, key); } out: return; } int ioc_inode_dump(xlator_t *this, inode_t *inode) { char *path = NULL; int ret = -1; char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; uint64_t tmp_ioc_inode = 0; ioc_inode_t *ioc_inode = NULL; gf_boolean_t section_added = _gf_false; char uuid_str[64] = { 0, }; if (this == NULL || inode == NULL) goto out; gf_proc_dump_build_key(key_prefix, "io-cache", "inode"); inode_ctx_get(inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; if (ioc_inode == NULL) goto out; /* Similar to ioc_page_dump function its better to use * pthread_mutex_trylock and not to use gf_log in statedump * to avoid deadlocks. */ ret = pthread_mutex_trylock(&ioc_inode->inode_lock); if (ret) goto out; { if (gf_uuid_is_null(ioc_inode->inode->gfid)) goto unlock; gf_proc_dump_add_section("%s", key_prefix); section_added = _gf_true; __inode_path(ioc_inode->inode, NULL, &path); gf_proc_dump_write("inode.weight", "%d", ioc_inode->weight); if (path) { gf_proc_dump_write("path", "%s", path); GF_FREE(path); } gf_proc_dump_write("uuid", "%s", uuid_utoa_r(ioc_inode->inode->gfid, uuid_str)); __ioc_cache_dump(ioc_inode, key_prefix); __ioc_inode_waitq_dump(ioc_inode, key_prefix); } unlock: pthread_mutex_unlock(&ioc_inode->inode_lock); out: if (ret && ioc_inode) { if (section_added == _gf_false) gf_proc_dump_add_section("%s", key_prefix); gf_proc_dump_write("Unable to print the status of ioc_inode", "(Lock acquisition failed) %s", uuid_utoa(inode->gfid)); } return ret; } int ioc_priv_dump(xlator_t *this) { ioc_table_t *priv = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; int ret = -1; gf_boolean_t add_section = _gf_false; if (!this || !this->private) goto out; priv = this->private; gf_proc_dump_build_key(key_prefix, "io-cache", "priv"); gf_proc_dump_add_section("%s", key_prefix); add_section = _gf_true; ret = pthread_mutex_trylock(&priv->table_lock); if (ret) goto out; { gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size); gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size); gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used); gf_proc_dump_write("inode_count", "%u", priv->inode_count); gf_proc_dump_write("cache_timeout", "%u", priv->cache_timeout); gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size); gf_proc_dump_write("max-file-size", "%" PRIu64, priv->max_file_size); } pthread_mutex_unlock(&priv->table_lock); out: if (ret && priv) { if (!add_section) { gf_proc_dump_build_key(key_prefix, "xlator." "performance.io-cache", "priv"); gf_proc_dump_add_section("%s", key_prefix); } gf_proc_dump_write( "Unable to dump the state of private " "structure of io-cache xlator", "(Lock " "acquisition failed) %s", this->name); } return 0; } /* * fini - * * @this: * */ void fini(xlator_t *this) { ioc_table_t *table = NULL; struct ioc_priority *curr = NULL, *tmp = NULL; table = this->private; if (table == NULL) return; this->private = NULL; if (table->mem_pool != NULL) { mem_pool_destroy(table->mem_pool); table->mem_pool = NULL; } list_for_each_entry_safe(curr, tmp, &table->priority_list, list) { list_del_init(&curr->list); GF_FREE(curr->pattern); GF_FREE(curr); } /* inode_lru and inodes list can be empty in case fini() is * called soon after init()? Hence commenting the below asserts. */ /*for (i = 0; i < table->max_pri; i++) { GF_ASSERT (list_empty (&table->inode_lru[i])); } GF_ASSERT (list_empty (&table->inodes)); */ pthread_mutex_destroy(&table->table_lock); GF_FREE(table); this->private = NULL; return; } struct xlator_fops fops = { .open = ioc_open, .create = ioc_create, .readv = ioc_readv, .writev = ioc_writev, .truncate = ioc_truncate, .ftruncate = ioc_ftruncate, .lookup = ioc_lookup, .lk = ioc_lk, .setattr = ioc_setattr, .mknod = ioc_mknod, .readdirp = ioc_readdirp, .discard = ioc_discard, .zerofill = ioc_zerofill, }; struct xlator_dumpops dumpops = { .priv = ioc_priv_dump, .inodectx = ioc_inode_dump, }; struct xlator_cbks cbks = { .forget = ioc_forget, .release = ioc_release, .invalidate = ioc_invalidate, }; struct volume_options options[] = { { .key = {"io-cache"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", .description = "enable/disable io-cache", .op_version = {GD_OP_VERSION_6_0}, .flags = OPT_FLAG_SETTABLE, }, {.key = {"priority"}, .type = GF_OPTION_TYPE_PRIORITY_LIST, .default_value = "", .description = "Assigns priority to filenames with specific " "patterns so that when a page needs to be ejected " "out of the cache, the page of a file whose " "priority is the lowest will be ejected earlier", .op_version = {1}, .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"cache-timeout", "force-revalidate-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 0, .max = 60, .default_value = "1", .description = "The cached data for a file will be retained for " "'cache-refresh-timeout' seconds, after which data " "re-validation is performed.", .op_version = {1}, .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"cache-size"}, .type = GF_OPTION_TYPE_SIZET, .min = 4 * GF_UNIT_MB, .max = INFINITY, .default_value = "32MB", .description = "Size of the read cache.", .op_version = {1}, .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"min-file-size"}, .type = GF_OPTION_TYPE_SIZET, .default_value = "0", .description = "Minimum file size which would be cached by the " "io-cache translator.", .op_version = {1}, .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"max-file-size"}, .type = GF_OPTION_TYPE_SIZET, .default_value = "0", .description = "Maximum file size which would be cached by the " "io-cache translator.", .op_version = {1}, .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"pass-through"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "false", .op_version = {GD_OP_VERSION_4_1_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT, .tags = {"io-cache"}, .description = "Enable/Disable io cache translator"}, {.key = {NULL}}, }; xlator_api_t xlator_api = { .init = init, .fini = fini, .reconfigure = reconfigure, .mem_acct_init = mem_acct_init, .op_version = {1}, /* Present from the initial version */ .dumpops = &dumpops, .fops = &fops, .cbks = &cbks, .options = options, .identifier = "io-cache", .category = GF_MAINTAINED, };