diff options
Diffstat (limited to 'libglusterfs')
143 files changed, 7622 insertions, 10684 deletions
diff --git a/libglusterfs/Makefile.am b/libglusterfs/Makefile.am index 8e5a4a0ccbf..d471a3f9243 100644 --- a/libglusterfs/Makefile.am +++ b/libglusterfs/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = src src/gfdb +SUBDIRS = src CLEANFILES = diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index a910d8e8a53..385e8ef4600 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -6,14 +6,14 @@ libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \ - -DXXH_NAMESPACE=GF_ \ - -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \ - -I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree \ + -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \ + -I$(CONTRIBDIR)/rbtree \ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \ -I$(CONTRIBDIR)/xxhash -libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) +libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) \ + $(URCU_LIBS) $(URCU_CDS_LIBS) libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \ -export-symbols $(top_srcdir)/libglusterfs/src/libglusterfs.sym @@ -37,30 +37,36 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \ $(CONTRIBDIR)/timer-wheel/timer-wheel.c \ $(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \ $(CONTRIBDIR)/xxhash/xxhash.c \ - compound-fop-utils.c throttle-tbf.c monitoring.c + throttle-tbf.c monitoring.c async.c nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c defaults.c -nodist_libglusterfs_la_HEADERS = y.tab.h protocol-common.h - -BUILT_SOURCES = graph.lex.c defaults.c eventtypes.h protocol-common.h - -protocol-common.h: $(top_srcdir)/rpc/rpc-lib/src/protocol-common.h - cp $(top_srcdir)/rpc/rpc-lib/src/protocol-common.h . - -libglusterfs_la_HEADERS = common-utils.h defaults.h default-args.h \ - dict.h glusterfs.h hashfn.h timespec.h logging.h xlator.h \ - stack.h timer.h list.h inode.h call-stub.h compat.h fd.h \ - revision.h compat-errno.h gf-event.h mem-pool.h byte-order.h \ - gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h \ - checksum.h daemon.h store.h rbthash.h iatt.h latency.h \ - mem-types.h syncop.h cluster-syncop.h graph-utils.h trie.h \ - refcount.h run.h options.h lkowner.h fd-lk.h circ-buff.h \ - event-history.h gidcache.h client_t.h glusterfs-acl.h \ - glfs-message-id.h template-component-messages.h strfd.h \ - syncop-utils.h parse-utils.h libglusterfs-messages.h \ - lvm-defaults.h quota-common-utils.h rot-buffs.h \ - compat-uuid.h upcall-utils.h throttle-tbf.h events.h\ - compound-fop-utils.h atomic.h monitoring.h +nodist_libglusterfs_la_HEADERS = y.tab.h + +BUILT_SOURCES = graph.lex.c defaults.c eventtypes.h + +libglusterfs_la_HEADERS = glusterfs/common-utils.h glusterfs/defaults.h \ + glusterfs/default-args.h glusterfs/dict.h glusterfs/glusterfs.h \ + glusterfs/hashfn.h glusterfs/timespec.h glusterfs/logging.h \ + glusterfs/xlator.h glusterfs/stack.h glusterfs/timer.h glusterfs/list.h \ + glusterfs/inode.h glusterfs/call-stub.h glusterfs/compat.h glusterfs/fd.h \ + glusterfs/revision.h glusterfs/compat-errno.h glusterfs/gf-event.h \ + glusterfs/mem-pool.h glusterfs/byte-order.h glusterfs/gf-dirent.h \ + glusterfs/locking.h glusterfs/syscall.h glusterfs/iobuf.h \ + glusterfs/globals.h glusterfs/statedump.h glusterfs/checksum.h \ + glusterfs/daemon.h glusterfs/store.h glusterfs/rbthash.h glusterfs/iatt.h \ + glusterfs/latency.h glusterfs/mem-types.h glusterfs/syncop.h \ + glusterfs/cluster-syncop.h glusterfs/graph-utils.h glusterfs/trie.h \ + glusterfs/refcount.h glusterfs/run.h glusterfs/options.h \ + glusterfs/lkowner.h glusterfs/fd-lk.h glusterfs/circ-buff.h \ + glusterfs/event-history.h glusterfs/gidcache.h glusterfs/client_t.h \ + glusterfs/glusterfs-acl.h glusterfs/glfs-message-id.h \ + glusterfs/template-component-messages.h glusterfs/strfd.h \ + glusterfs/syncop-utils.h glusterfs/parse-utils.h \ + glusterfs/libglusterfs-messages.h glusterfs/lvm-defaults.h \ + glusterfs/quota-common-utils.h glusterfs/rot-buffs.h \ + glusterfs/compat-uuid.h glusterfs/upcall-utils.h glusterfs/throttle-tbf.h \ + glusterfs/events.h glusterfs/atomic.h glusterfs/monitoring.h \ + glusterfs/async.h glusterfs/glusterfs-fops.h libglusterfs_ladir = $(includedir)/glusterfs @@ -70,7 +76,10 @@ noinst_HEADERS = unittest/unittest.h \ $(CONTRIBDIR)/libexecinfo/execinfo_compat.h \ $(CONTRIBDIR)/timer-wheel/timer-wheel.h \ $(CONTRIBDIR)/xxhash/xxhash.h \ - tier-ctr-interface.h + $(CONTRIBDIR)/userspace-rcu/wfcqueue.h \ + $(CONTRIBDIR)/userspace-rcu/wfstack.h \ + $(CONTRIBDIR)/userspace-rcu/static-wfcqueue.h \ + $(CONTRIBDIR)/userspace-rcu/static-wfstack.h eventtypes.h: $(top_srcdir)/events/eventskeygen.py $(PYTHON) $(top_srcdir)/events/eventskeygen.py C_HEADER diff --git a/libglusterfs/src/async.c b/libglusterfs/src/async.c new file mode 100644 index 00000000000..1d6cfa374b6 --- /dev/null +++ b/libglusterfs/src/async.c @@ -0,0 +1,720 @@ +/* + Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* To implement an efficient thread pool with minimum contention we have used + * the following ideas: + * + * - The queue of jobs has been implemented using a Wait-Free queue provided + * by the userspace-rcu library. This queue requires a mutex when multiple + * consumers can be extracting items from it concurrently, but the locked + * region is very small, which minimizes the chances of contention. To + * further minimize contention, the number of active worker threads that + * are accessing the queue is dynamically adjusted so that we always have + * the minimum required amount of workers contending for the queue. Adding + * new items can be done with a single atomic operation, without locks. + * + * - All queue management operations, like creating more threads, enabling + * sleeping ones, etc. are done by a single thread. This makes it possible + * to manage all scaling related information and workers lists without + * locks. This functionality is implemented as a role that can be assigned + * to any of the worker threads, which avoids that some lengthy operations + * could interfere with this task. + * + * - Management is based on signals. We used signals for management tasks to + * avoid multiple system calls for each request (with signals we can wait + * for multiple events and get some additional data for each request in a + * single call, instead of first polling and then reading). + * + * TODO: There are some other changes that can take advantage of this new + * thread pool. + * + * - Use this thread pool as the core threading model for synctasks. I + * think this would improve synctask performance because I think we + * currently have some contention there for some workloads. + * + * - Implement a per thread timer that will allow adding and removing + * timers without using mutexes. + * + * - Integrate with userspace-rcu library in QSBR mode, allowing + * other portions of code to be implemented using RCU-based + * structures with a extremely fast read side without contention. + * + * - Integrate I/O into the thread pool so that the thread pool is + * able to efficiently manage all loads and scale dynamically. This + * could make it possible to minimize context switching when serving + * requests from fuse or network. + * + * - Dynamically scale the number of workers based on system load. + * This will make it possible to reduce contention when system is + * heavily loaded, improving performance under these circumstances + * (or minimizing performance loss). This will also make it possible + * that gluster can coexist with other processes that also consume + * CPU, with minimal interference from each other. + */ + +#include <unistd.h> +#include <pthread.h> +#include <errno.h> + +#include "glusterfs/list.h" +#include "glusterfs/mem-types.h" +#include "glusterfs/async.h" + +/* These macros wrap a simple system/library call to check the returned error + * and log a message in case of failure. */ +#define GF_ASYNC_CHECK(_func, _args...) \ + ({ \ + int32_t __async_error = -_func(_args); \ + if (caa_unlikely(__async_error != 0)) { \ + gf_async_error(__async_error, #_func "() failed."); \ + } \ + __async_error; \ + }) + +#define GF_ASYNC_CHECK_ERRNO(_func, _args...) \ + ({ \ + int32_t __async_error = _func(_args); \ + if (caa_unlikely(__async_error < 0)) { \ + __async_error = -errno; \ + gf_async_error(__async_error, #_func "() failed."); \ + } \ + __async_error; \ + }) + +/* These macros are used when, based on POSIX documentation, the function + * should never fail under the conditions we are using it. So any unexpected + * error will be handled as a fatal event. It probably means a critical bug + * or memory corruption. In both cases we consider that stopping the process + * is safer (otherwise it could cause more corruption with unknown effects + * that could be worse). */ +#define GF_ASYNC_CANTFAIL(_func, _args...) \ + do { \ + int32_t __async_error = -_func(_args); \ + if (caa_unlikely(__async_error != 0)) { \ + gf_async_fatal(__async_error, #_func "() failed"); \ + } \ + } while (0) + +#define GF_ASYNC_CANTFAIL_ERRNO(_func, _args...) \ + ({ \ + int32_t __async_error = _func(_args); \ + if (caa_unlikely(__async_error < 0)) { \ + __async_error = -errno; \ + gf_async_fatal(__async_error, #_func "() failed"); \ + } \ + __async_error; \ + }) + +/* TODO: for now we allocate a static array of workers. There's an issue if we + * try to use dynamic memory since these workers are initialized very + * early in the process startup and it seems that sometimes not all is + * ready to use dynamic memory. */ +static gf_async_worker_t gf_async_workers[GF_ASYNC_MAX_THREADS]; + +/* This is the only global variable needed to manage the entire framework. */ +gf_async_control_t gf_async_ctrl = {}; + +static __thread gf_async_worker_t *gf_async_current_worker = NULL; + +/* The main function of the worker threads. */ +static void * +gf_async_worker(void *arg); + +static void +gf_async_sync_init(void) +{ + GF_ASYNC_CANTFAIL(pthread_barrier_init, &gf_async_ctrl.sync, NULL, 2); +} + +static void +gf_async_sync_now(void) +{ + int32_t ret; + + ret = pthread_barrier_wait(&gf_async_ctrl.sync); + if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { + GF_ASYNC_CANTFAIL(pthread_barrier_destroy, &gf_async_ctrl.sync); + ret = 0; + } + if (caa_unlikely(ret != 0)) { + gf_async_fatal(-ret, "pthread_barrier_wait() failed"); + } +} + +static void +gf_async_sigmask_empty(sigset_t *mask) +{ + GF_ASYNC_CANTFAIL_ERRNO(sigemptyset, mask); +} + +static void +gf_async_sigmask_add(sigset_t *mask, int32_t signal) +{ + GF_ASYNC_CANTFAIL_ERRNO(sigaddset, mask, signal); +} + +static void +gf_async_sigmask_set(int32_t mode, sigset_t *mask, sigset_t *old) +{ + GF_ASYNC_CANTFAIL(pthread_sigmask, mode, mask, old); +} + +static void +gf_async_sigaction(int32_t signum, const struct sigaction *action, + struct sigaction *old) +{ + GF_ASYNC_CANTFAIL_ERRNO(sigaction, signum, action, old); +} + +static int32_t +gf_async_sigwait(sigset_t *set) +{ + int32_t ret, signum; + + do { + ret = sigwait(set, &signum); + } while (caa_unlikely((ret < 0) && (errno == EINTR))); + + if (caa_unlikely(ret < 0)) { + ret = -errno; + gf_async_fatal(ret, "sigwait() failed"); + } + + return signum; +} + +static int32_t +gf_async_sigtimedwait(sigset_t *set, struct timespec *timeout) +{ + int32_t ret; + + do { + ret = sigtimedwait(set, NULL, timeout); + } while (caa_unlikely((ret < 0) && (errno == EINTR))); + if (caa_unlikely(ret < 0)) { + ret = -errno; + /* EAGAIN means that the timeout has expired, so we allow this error. + * Any other error shouldn't happen. */ + if (caa_unlikely(ret != -EAGAIN)) { + gf_async_fatal(ret, "sigtimedwait() failed"); + } + ret = 0; + } + + return ret; +} + +static void +gf_async_sigbroadcast(int32_t signum) +{ + GF_ASYNC_CANTFAIL_ERRNO(kill, gf_async_ctrl.pid, signum); +} + +static void +gf_async_signal_handler(int32_t signum) +{ + /* We should never handle a signal in this function. */ + gf_async_fatal(-EBUSY, + "Unexpected processing of signal %d through a handler.", + signum); +} + +static void +gf_async_signal_setup(void) +{ + struct sigaction action; + + /* We configure all related signals so that we can detect threads using an + * invalid signal mask that doesn't block our critical signal. */ + memset(&action, 0, sizeof(action)); + action.sa_handler = gf_async_signal_handler; + + gf_async_sigaction(GF_ASYNC_SIGCTRL, &action, &gf_async_ctrl.handler_ctrl); + + gf_async_sigaction(GF_ASYNC_SIGQUEUE, &action, + &gf_async_ctrl.handler_queue); +} + +static void +gf_async_signal_restore(void) +{ + /* Handlers we have previously changed are restored back to their original + * value. */ + + if (gf_async_ctrl.handler_ctrl.sa_handler != gf_async_signal_handler) { + gf_async_sigaction(GF_ASYNC_SIGCTRL, &gf_async_ctrl.handler_ctrl, NULL); + } + + if (gf_async_ctrl.handler_queue.sa_handler != gf_async_signal_handler) { + gf_async_sigaction(GF_ASYNC_SIGQUEUE, &gf_async_ctrl.handler_queue, + NULL); + } +} + +static void +gf_async_signal_flush(void) +{ + struct timespec delay; + + delay.tv_sec = 0; + delay.tv_nsec = 0; + + /* We read all pending signals so that they don't trigger once the signal + * mask of some thread is changed. */ + while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_ctrl, &delay) > 0) { + } + while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_queue, &delay) > 0) { + } +} + +static int32_t +gf_async_thread_create(pthread_t *thread, int32_t id, void *data) +{ + int32_t ret; + + ret = gf_thread_create(thread, NULL, gf_async_worker, data, + GF_ASYNC_THREAD_NAME "%u", id); + if (caa_unlikely(ret < 0)) { + /* TODO: gf_thread_create() should return a more specific error + * code. */ + return -ENOMEM; + } + + return 0; +} + +static void +gf_async_thread_wait(pthread_t thread) +{ + /* TODO: this is a blocking call executed inside one of the workers of the + * thread pool. This is bad, but this is only executed once we have + * received a notification from the thread that it's terminating, so + * this should return almost immediately. However, to be more robust + * it would be better to use pthread_timedjoin_np() (or even a call + * to pthread_tryjoin_np() followed by a delayed recheck if it + * fails), but they are not portable. We should see how to do this + * in other platforms. */ + GF_ASYNC_CANTFAIL(pthread_join, thread, NULL); +} + +static int32_t +gf_async_worker_create(void) +{ + struct cds_wfs_node *node; + gf_async_worker_t *worker; + uint32_t counts, running, max; + int32_t ret; + + node = __cds_wfs_pop_blocking(&gf_async_ctrl.available); + if (caa_unlikely(node == NULL)) { + /* There are no more available workers. We have all threads running. */ + return 1; + } + cds_wfs_node_init(node); + + ret = 1; + + counts = uatomic_read(&gf_async_ctrl.counts); + max = uatomic_read(&gf_async_ctrl.max_threads); + running = GF_ASYNC_COUNT_RUNNING(counts); + if (running < max) { + uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(1, 0)); + + worker = caa_container_of(node, gf_async_worker_t, stack); + + ret = gf_async_thread_create(&worker->thread, worker->id, worker); + if (caa_likely(ret >= 0)) { + return 0; + } + + uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(-1, 0)); + } + + cds_wfs_push(&gf_async_ctrl.available, node); + + return ret; +} + +static void +gf_async_worker_enable(void) +{ + /* This will wake one of the spare workers. If all workers are busy now, + * the signal will be queued so that the first one that completes its + * work will become the leader. */ + gf_async_sigbroadcast(GF_ASYNC_SIGCTRL); + + /* We have consumed a spare worker. We create another one for future + * needs. */ + gf_async_worker_create(); +} + +static void +gf_async_worker_wait(void) +{ + int32_t signum; + + signum = gf_async_sigwait(&gf_async_ctrl.sigmask_ctrl); + if (caa_unlikely(signum != GF_ASYNC_SIGCTRL)) { + gf_async_fatal(-EINVAL, "Worker received an unexpected signal (%d)", + signum); + } +} + +static void +gf_async_leader_wait(void) +{ + int32_t signum; + + signum = gf_async_sigwait(&gf_async_ctrl.sigmask_queue); + if (caa_unlikely(signum != GF_ASYNC_SIGQUEUE)) { + gf_async_fatal(-EINVAL, "Leader received an unexpected signal (%d)", + signum); + } +} + +static void +gf_async_run(struct cds_wfcq_node *node) +{ + gf_async_t *async; + + /* We've just got work from the queue. Process it. */ + async = caa_container_of(node, gf_async_t, queue); + /* TODO: remove dependency from THIS and xl. */ + THIS = async->xl; + async->cbk(async->xl, async); +} + +static void +gf_async_worker_run(void) +{ + struct cds_wfcq_node *node; + + do { + /* We keep executing jobs from the queue while it's not empty. Note + * that while we do this, we are ignoring any stop request. That's + * fine, since we need to process our own 'join' messages to fully + * terminate all threads. Note that normal jobs should have already + * completed once a stop request is received. */ + node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head, + &gf_async_ctrl.queue.tail); + if (node != NULL) { + gf_async_run(node); + } + } while (node != NULL); + + /* TODO: I've tried to keep the worker looking at the queue for some small + * amount of time in a busy loop to see if more jobs come soon. With + * this I attempted to avoid the overhead of signal management if + * jobs come fast enough. However experimental results seem to + * indicate that doing this, CPU utilization grows and performance + * is actually reduced. We need to see if that's because I used bad + * parameters or it's really better to do it as it's done now. */ +} + +static void +gf_async_leader_run(void) +{ + struct cds_wfcq_node *node; + + node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head, + &gf_async_ctrl.queue.tail); + while (caa_unlikely(node == NULL)) { + gf_async_leader_wait(); + + node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head, + &gf_async_ctrl.queue.tail); + } + + /* Activate the next available worker thread. It will become the new + * leader. */ + gf_async_worker_enable(); + + gf_async_run(node); +} + +static uint32_t +gf_async_stop_check(gf_async_worker_t *worker) +{ + uint32_t counts, old, running, max; + + /* First we check if we should stop without doing any costly atomic + * operation. */ + old = uatomic_read(&gf_async_ctrl.counts); + max = uatomic_read(&gf_async_ctrl.max_threads); + running = GF_ASYNC_COUNT_RUNNING(old); + while (running > max) { + /* There are too many threads. We try to stop the current worker. */ + counts = uatomic_cmpxchg(&gf_async_ctrl.counts, old, + old + GF_ASYNC_COUNTS(-1, 1)); + if (old != counts) { + /* Another thread has just updated the counts. We need to retry. */ + old = counts; + running = GF_ASYNC_COUNT_RUNNING(old); + + continue; + } + + running--; + worker->running = false; + } + + return running; +} + +static void +gf_async_stop_all(xlator_t *xl, gf_async_t *async) +{ + if (gf_async_stop_check(gf_async_current_worker) > 0) { + /* There are more workers running. We propagate the stop request to + * them. */ + gf_async(async, xl, gf_async_stop_all); + } +} + +static void +gf_async_join(xlator_t *xl, gf_async_t *async) +{ + gf_async_worker_t *worker; + + worker = caa_container_of(async, gf_async_worker_t, async); + + gf_async_thread_wait(worker->thread); + + cds_wfs_push(&gf_async_ctrl.available, &worker->stack); +} + +static void +gf_async_terminate(gf_async_worker_t *worker) +{ + uint32_t counts; + + counts = uatomic_add_return(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(0, -1)); + if (counts == 0) { + /* This is the termination of the last worker thread. We need to + * synchronize the main thread that is waiting for all workers to + * finish. */ + gf_async_ctrl.sync_thread = worker->thread; + + gf_async_sync_now(); + } else { + /* Force someone else to join this thread to release resources. */ + gf_async(&worker->async, THIS, gf_async_join); + } +} + +static void * +gf_async_worker(void *arg) +{ + gf_async_worker_t *worker; + + worker = (gf_async_worker_t *)arg; + gf_async_current_worker = worker; + + worker->running = true; + do { + /* This thread does nothing until someone enables it to become a + * leader. */ + gf_async_worker_wait(); + + /* This thread is now a leader. It will process jobs from the queue + * and, if necessary, enable another worker and transfer leadership + * to it. */ + gf_async_leader_run(); + + /* This thread is not a leader anymore. It will continue processing + * queued jobs until it becomes empty. */ + gf_async_worker_run(); + + /* Stop the current thread if there are too many threads running. */ + gf_async_stop_check(worker); + } while (worker->running); + + gf_async_terminate(worker); + + return NULL; +} + +static void +gf_async_cleanup(void) +{ + /* We do some basic initialization of the global variable 'gf_async_ctrl' + * so that it's put into a relatively consistent state. */ + + gf_async_ctrl.enabled = false; + + gf_async_ctrl.pid = 0; + gf_async_sigmask_empty(&gf_async_ctrl.sigmask_ctrl); + gf_async_sigmask_empty(&gf_async_ctrl.sigmask_queue); + + /* This is used to later detect if the handler of these signals have been + * changed or not. */ + gf_async_ctrl.handler_ctrl.sa_handler = gf_async_signal_handler; + gf_async_ctrl.handler_queue.sa_handler = gf_async_signal_handler; + + gf_async_ctrl.table = NULL; + gf_async_ctrl.max_threads = 0; + gf_async_ctrl.counts = 0; +} + +void +gf_async_fini(void) +{ + gf_async_t async; + + if (uatomic_read(&gf_async_ctrl.counts) != 0) { + /* We ensure that all threads will quit on the next check. */ + gf_async_ctrl.max_threads = 0; + + /* Send the stop request to the thread pool. This will cause the + * execution of gf_async_stop_all() by one of the worker threads which, + * eventually, will terminate all worker threads. */ + gf_async(&async, THIS, gf_async_stop_all); + + /* We synchronize here with the last thread. */ + gf_async_sync_now(); + + /* We have just synchronized with the latest thread. Now just wait for + * it to terminate. */ + gf_async_thread_wait(gf_async_ctrl.sync_thread); + + gf_async_signal_flush(); + } + + gf_async_signal_restore(); + + gf_async_cleanup(); +} + +void +gf_async_adjust_threads(int32_t threads) +{ + if (threads == 0) { + /* By default we allow a maximum of 2 * #cores worker threads. This + * value is to try to accommodate threads that will do some I/O. Having + * more threads than cores we can keep CPU busy even if some threads + * are blocked for I/O. In the most efficient case, we can have #cores + * computing threads and #cores blocked threads on I/O. However this is + * hard to achieve because we can end with more than #cores computing + * threads, which won't provide a real benefit and will increase + * contention. + * + * TODO: implement a more intelligent dynamic maximum based on CPU + * usage and/or system load. */ + threads = sysconf(_SC_NPROCESSORS_ONLN) * 2; + if (threads < 0) { + /* If we can't get the current number of processors, we pick a + * random number. */ + threads = 16; + } + } + if (threads > GF_ASYNC_MAX_THREADS) { + threads = GF_ASYNC_MAX_THREADS; + } + uatomic_set(&gf_async_ctrl.max_threads, threads); +} + +int32_t +gf_async_init(glusterfs_ctx_t *ctx) +{ + sigset_t set; + gf_async_worker_t *worker; + uint32_t i; + int32_t ret; + bool running; + + gf_async_cleanup(); + + if (!ctx->cmd_args.global_threading || + (ctx->process_mode == GF_GLUSTERD_PROCESS)) { + return 0; + } + + /* At the init time, the maximum number of threads has not yet been + * configured. We use a small starting value that will be layer dynamically + * adjusted when ctx->config.max_threads is updated. */ + gf_async_adjust_threads(GF_ASYNC_SPARE_THREADS + 1); + + gf_async_ctrl.pid = getpid(); + + __cds_wfs_init(&gf_async_ctrl.available); + cds_wfcq_init(&gf_async_ctrl.queue.head, &gf_async_ctrl.queue.tail); + + gf_async_sync_init(); + + /* TODO: it would be cleaner to use dynamic memory, but at this point some + * memory management resources are not yet initialized. */ + gf_async_ctrl.table = gf_async_workers; + + /* We keep all workers in a stack. It will be used when a new thread needs + * to be created. */ + for (i = GF_ASYNC_MAX_THREADS; i > 0; i--) { + worker = &gf_async_ctrl.table[i - 1]; + + worker->id = i - 1; + cds_wfs_node_init(&worker->stack); + cds_wfs_push(&gf_async_ctrl.available, &worker->stack); + } + + /* Prepare the signal mask for regular workers and the leader. */ + gf_async_sigmask_add(&gf_async_ctrl.sigmask_ctrl, GF_ASYNC_SIGCTRL); + gf_async_sigmask_add(&gf_async_ctrl.sigmask_queue, GF_ASYNC_SIGQUEUE); + + /* TODO: this is needed to block our special signals in the current thread + * and all children that it starts. It would be cleaner to do it when + * signals are initialized, but there doesn't seem to be a unique + * place to do that, so for now we do it here. */ + gf_async_sigmask_empty(&set); + gf_async_sigmask_add(&set, GF_ASYNC_SIGCTRL); + gf_async_sigmask_add(&set, GF_ASYNC_SIGQUEUE); + gf_async_sigmask_set(SIG_BLOCK, &set, NULL); + + /* Configure the signal handlers. This is mostly for safety, not really + * needed, but it doesn't hurt. Note that the caller must ensure that the + * signals we need to run are already blocked in any thread already + * started. Otherwise this won't work. */ + gf_async_signal_setup(); + + running = false; + + /* We start the spare workers + 1 for the leader. */ + for (i = 0; i < GF_ASYNC_SPARE_THREADS; i++) { + ret = gf_async_worker_create(); + if (caa_unlikely(ret < 0)) { + /* This is the initial start up so we enforce that the spare + * threads are created. If this fails at the beginning, it's very + * unlikely that the async workers could do its job, so we abort + * the initialization. */ + goto out; + } + + /* Once the first thread is started, we can enable it to become the + * initial leader. */ + if ((ret == 0) && !running) { + running = true; + gf_async_worker_enable(); + } + } + + if (caa_unlikely(!running)) { + gf_async_fatal(-ENOMEM, "No worker thread has started"); + } + + gf_async_ctrl.enabled = true; + + ret = 0; + +out: + if (ret < 0) { + gf_async_error(ret, "Unable to initialize the thread pool."); + gf_async_fini(); + } + + return ret; +} diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c index 7e2713fa384..ee84f08acd4 100644 --- a/libglusterfs/src/call-stub.c +++ b/libglusterfs/src/call-stub.c @@ -11,12 +11,12 @@ #include <openssl/md5.h> #include <inttypes.h> -#include "call-stub.h" -#include "mem-types.h" -#include "libglusterfs-messages.h" +#include "glusterfs/call-stub.h" +#include "glusterfs/mem-types.h" +#include "glusterfs/libglusterfs-messages.h" static call_stub_t * -stub_new(call_frame_t *frame, char wind, glusterfs_fop_t fop) +stub_new(call_frame_t *frame, const char wind, const glusterfs_fop_t fop) { call_stub_t *new = NULL; @@ -41,7 +41,6 @@ fop_lookup_stub(call_frame_t *frame, fop_lookup_t fn, loc_t *loc, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_LOOKUP); @@ -60,8 +59,6 @@ fop_lookup_cbk_stub(call_frame_t *frame, fop_lookup_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_LOOKUP); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -77,7 +74,6 @@ fop_stat_stub(call_frame_t *frame, fop_stat_t fn, loc_t *loc, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_STAT); @@ -95,8 +91,6 @@ fop_stat_cbk_stub(call_frame_t *frame, fop_stat_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_STAT); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -111,8 +105,6 @@ fop_fstat_stub(call_frame_t *frame, fop_fstat_t fn, fd_t *fd, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_FSTAT); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -128,8 +120,6 @@ fop_fstat_cbk_stub(call_frame_t *frame, fop_fstat_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FSTAT); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -145,7 +135,6 @@ fop_truncate_stub(call_frame_t *frame, fop_truncate_t fn, loc_t *loc, off_t off, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_TRUNCATE); @@ -164,8 +153,6 @@ fop_truncate_cbk_stub(call_frame_t *frame, fop_truncate_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_TRUNCATE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -182,8 +169,6 @@ fop_ftruncate_stub(call_frame_t *frame, fop_ftruncate_t fn, fd_t *fd, off_t off, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_FTRUNCATE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -201,8 +186,6 @@ fop_ftruncate_cbk_stub(call_frame_t *frame, fop_ftruncate_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FTRUNCATE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -220,7 +203,6 @@ fop_access_stub(call_frame_t *frame, fop_access_t fn, loc_t *loc, int32_t mask, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_ACCESS); @@ -238,8 +220,6 @@ fop_access_cbk_stub(call_frame_t *frame, fop_access_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_ACCESS); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -255,7 +235,6 @@ fop_readlink_stub(call_frame_t *frame, fop_readlink_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_READLINK); @@ -274,8 +253,6 @@ fop_readlink_cbk_stub(call_frame_t *frame, fop_readlink_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_READLINK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -292,7 +269,6 @@ fop_mknod_stub(call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_MKNOD); @@ -312,8 +288,6 @@ fop_mknod_cbk_stub(call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_MKNOD); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -330,7 +304,6 @@ fop_mkdir_stub(call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_MKDIR); @@ -350,8 +323,6 @@ fop_mkdir_cbk_stub(call_frame_t *frame, fop_mkdir_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_MKDIR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -368,7 +339,6 @@ fop_unlink_stub(call_frame_t *frame, fop_unlink_t fn, loc_t *loc, int xflag, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_UNLINK); @@ -388,8 +358,6 @@ fop_unlink_cbk_stub(call_frame_t *frame, fop_unlink_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_UNLINK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -406,7 +374,6 @@ fop_rmdir_stub(call_frame_t *frame, fop_rmdir_t fn, loc_t *loc, int flags, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_RMDIR); @@ -426,8 +393,6 @@ fop_rmdir_cbk_stub(call_frame_t *frame, fop_rmdir_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_RMDIR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -444,7 +409,6 @@ fop_symlink_stub(call_frame_t *frame, fop_symlink_t fn, const char *linkname, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); GF_VALIDATE_OR_GOTO("call-stub", linkname, out); @@ -465,8 +429,6 @@ fop_symlink_cbk_stub(call_frame_t *frame, fop_symlink_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_SYMLINK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -483,7 +445,6 @@ fop_rename_stub(call_frame_t *frame, fop_rename_t fn, loc_t *oldloc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", oldloc, out); GF_VALIDATE_OR_GOTO("call-stub", newloc, out); @@ -505,8 +466,6 @@ fop_rename_cbk_stub(call_frame_t *frame, fop_rename_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_RENAME); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -523,7 +482,6 @@ fop_link_stub(call_frame_t *frame, fop_link_t fn, loc_t *oldloc, loc_t *newloc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", oldloc, out); GF_VALIDATE_OR_GOTO("call-stub", newloc, out); @@ -544,8 +502,6 @@ fop_link_cbk_stub(call_frame_t *frame, fop_link_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_LINK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -562,7 +518,6 @@ fop_create_stub(call_frame_t *frame, fop_create_t fn, loc_t *loc, int32_t flags, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_CREATE); @@ -582,8 +537,6 @@ fop_create_cbk_stub(call_frame_t *frame, fop_create_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_CREATE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -600,7 +553,6 @@ fop_open_stub(call_frame_t *frame, fop_open_t fn, loc_t *loc, int32_t flags, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_OPEN); @@ -618,8 +570,6 @@ fop_open_cbk_stub(call_frame_t *frame, fop_open_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_OPEN); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -635,8 +585,6 @@ fop_readv_stub(call_frame_t *frame, fop_readv_t fn, fd_t *fd, size_t size, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_READ); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -653,8 +601,6 @@ fop_readv_cbk_stub(call_frame_t *frame, fop_readv_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_READ); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -672,7 +618,6 @@ fop_writev_stub(call_frame_t *frame, fop_writev_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", vector, out); stub = stub_new(frame, 1, GF_FOP_WRITE); @@ -692,8 +637,6 @@ fop_writev_cbk_stub(call_frame_t *frame, fop_writev_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_WRITE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -709,8 +652,6 @@ fop_flush_stub(call_frame_t *frame, fop_flush_t fn, fd_t *fd, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_FLUSH); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -726,8 +667,6 @@ fop_flush_cbk_stub(call_frame_t *frame, fop_flush_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FLUSH); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -743,8 +682,6 @@ fop_fsync_stub(call_frame_t *frame, fop_fsync_t fn, fd_t *fd, int32_t datasync, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_FSYNC); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -761,8 +698,6 @@ fop_fsync_cbk_stub(call_frame_t *frame, fop_fsync_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FSYNC); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -779,7 +714,6 @@ fop_opendir_stub(call_frame_t *frame, fop_opendir_t fn, loc_t *loc, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_OPENDIR); @@ -797,8 +731,6 @@ fop_opendir_cbk_stub(call_frame_t *frame, fop_opendir_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_OPENDIR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -814,8 +746,6 @@ fop_fsyncdir_stub(call_frame_t *frame, fop_fsyncdir_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_FSYNCDIR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -831,8 +761,6 @@ fop_fsyncdir_cbk_stub(call_frame_t *frame, fop_fsyncdir_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FSYNCDIR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -847,7 +775,6 @@ fop_statfs_stub(call_frame_t *frame, fop_statfs_t fn, loc_t *loc, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_STATFS); @@ -865,8 +792,6 @@ fop_statfs_cbk_stub(call_frame_t *frame, fop_statfs_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_STATFS); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -882,7 +807,6 @@ fop_setxattr_stub(call_frame_t *frame, fop_setxattr_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_SETXATTR); @@ -900,8 +824,6 @@ fop_setxattr_cbk_stub(call_frame_t *frame, fop_setxattr_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_SETXATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -917,7 +839,6 @@ fop_getxattr_stub(call_frame_t *frame, fop_getxattr_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); stub = stub_new(frame, 1, GF_FOP_GETXATTR); @@ -936,8 +857,6 @@ fop_getxattr_cbk_stub(call_frame_t *frame, fop_getxattr_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_GETXATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -953,7 +872,6 @@ fop_fsetxattr_stub(call_frame_t *frame, fop_fsetxattr_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fd, out); stub = stub_new(frame, 1, GF_FOP_FSETXATTR); @@ -971,8 +889,6 @@ fop_fsetxattr_cbk_stub(call_frame_t *frame, fop_fsetxattr_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FSETXATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -988,7 +904,6 @@ fop_fgetxattr_stub(call_frame_t *frame, fop_fgetxattr_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fd, out); stub = stub_new(frame, 1, GF_FOP_FGETXATTR); @@ -1007,8 +922,6 @@ fop_fgetxattr_cbk_stub(call_frame_t *frame, fop_fgetxattr_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_GETXATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1024,7 +937,6 @@ fop_removexattr_stub(call_frame_t *frame, fop_removexattr_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", loc, out); GF_VALIDATE_OR_GOTO("call-stub", name, out); @@ -1043,8 +955,6 @@ fop_removexattr_cbk_stub(call_frame_t *frame, fop_removexattr_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_REMOVEXATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1060,7 +970,6 @@ fop_fremovexattr_stub(call_frame_t *frame, fop_fremovexattr_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fd, out); GF_VALIDATE_OR_GOTO("call-stub", name, out); @@ -1079,8 +988,6 @@ fop_fremovexattr_cbk_stub(call_frame_t *frame, fop_fremovexattr_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FREMOVEXATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1096,7 +1003,6 @@ fop_lk_stub(call_frame_t *frame, fop_lk_t fn, fd_t *fd, int32_t cmd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", lock, out); stub = stub_new(frame, 1, GF_FOP_LK); @@ -1114,8 +1020,6 @@ fop_lk_cbk_stub(call_frame_t *frame, fop_lk_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_LK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1131,7 +1035,6 @@ fop_inodelk_stub(call_frame_t *frame, fop_inodelk_t fn, const char *volume, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", lock, out); stub = stub_new(frame, 1, GF_FOP_INODELK); @@ -1149,8 +1052,6 @@ fop_inodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_INODELK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1166,7 +1067,6 @@ fop_finodelk_stub(call_frame_t *frame, fop_finodelk_t fn, const char *volume, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", lock, out); stub = stub_new(frame, 1, GF_FOP_FINODELK); @@ -1185,8 +1085,6 @@ fop_finodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FINODELK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1203,8 +1101,6 @@ fop_entrylk_stub(call_frame_t *frame, fop_entrylk_t fn, const char *volume, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_ENTRYLK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1221,8 +1117,6 @@ fop_entrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_ENTRYLK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1239,8 +1133,6 @@ fop_fentrylk_stub(call_frame_t *frame, fop_fentrylk_t fn, const char *volume, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 1, GF_FOP_FENTRYLK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1256,8 +1148,6 @@ fop_fentrylk_cbk_stub(call_frame_t *frame, fop_fentrylk_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FENTRYLK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1274,8 +1164,6 @@ fop_readdirp_cbk_stub(call_frame_t *frame, fop_readdirp_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_READDIRP); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1291,8 +1179,6 @@ fop_readdir_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_READDIR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1338,7 +1224,6 @@ fop_rchecksum_stub(call_frame_t *frame, fop_rchecksum_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fd, out); stub = stub_new(frame, 1, GF_FOP_RCHECKSUM); @@ -1357,8 +1242,6 @@ fop_rchecksum_cbk_stub(call_frame_t *frame, fop_rchecksum_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_RCHECKSUM); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1375,8 +1258,6 @@ fop_xattrop_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_XATTROP); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1392,7 +1273,6 @@ fop_fxattrop_cbk_stub(call_frame_t *frame, fop_fxattrop_cbk_t fn, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); stub = stub_new(frame, 0, GF_FOP_FXATTROP); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1409,7 +1289,6 @@ fop_xattrop_stub(call_frame_t *frame, fop_xattrop_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", xattr, out); stub = stub_new(frame, 1, GF_FOP_XATTROP); @@ -1427,7 +1306,6 @@ fop_fxattrop_stub(call_frame_t *frame, fop_fxattrop_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", xattr, out); stub = stub_new(frame, 1, GF_FOP_FXATTROP); @@ -1446,8 +1324,6 @@ fop_setattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_SETATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1465,8 +1341,6 @@ fop_fsetattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FSETATTR); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1483,7 +1357,6 @@ fop_setattr_stub(call_frame_t *frame, fop_setattr_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_SETATTR); @@ -1501,7 +1374,6 @@ fop_fsetattr_stub(call_frame_t *frame, fop_fsetattr_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_FSETATTR); @@ -1520,8 +1392,6 @@ fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_FALLOCATE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1539,7 +1409,6 @@ fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_FALLOCATE); @@ -1558,8 +1427,6 @@ fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_DISCARD); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1577,7 +1444,6 @@ fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd, off_t offset, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_DISCARD); @@ -1596,8 +1462,6 @@ fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_ZEROFILL); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1615,7 +1479,6 @@ fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_ZEROFILL); @@ -1633,8 +1496,6 @@ fop_ipc_cbk_stub(call_frame_t *frame, fop_ipc_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_IPC); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1650,7 +1511,6 @@ fop_ipc_stub(call_frame_t *frame, fop_ipc_t fn, int32_t op, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_IPC); @@ -1668,8 +1528,6 @@ fop_lease_cbk_stub(call_frame_t *frame, fop_lease_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_LEASE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1685,7 +1543,6 @@ fop_lease_stub(call_frame_t *frame, fop_lease_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); GF_VALIDATE_OR_GOTO("call-stub", lease, out); @@ -1704,8 +1561,6 @@ fop_seek_cbk_stub(call_frame_t *frame, fop_seek_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_SEEK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1722,7 +1577,6 @@ fop_seek_stub(call_frame_t *frame, fop_seek_t fn, fd_t *fd, off_t offset, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_SEEK); @@ -1741,8 +1595,6 @@ fop_getactivelk_cbk_stub(call_frame_t *frame, fop_getactivelk_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_GETACTIVELK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1759,7 +1611,6 @@ fop_getactivelk_stub(call_frame_t *frame, fop_getactivelk_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_GETACTIVELK); @@ -1781,8 +1632,6 @@ fop_setactivelk_cbk_stub(call_frame_t *frame, fop_setactivelk_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_SETACTIVELK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1803,7 +1652,6 @@ fop_setactivelk_stub(call_frame_t *frame, fop_setactivelk_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_SETACTIVELK); @@ -1818,13 +1666,55 @@ out: } call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t len, uint32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO("call-stub", fn, out); + + stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE); + GF_VALIDATE_OR_GOTO("call-stub", stub, out); + + stub->fn.copy_file_range = fn; + + args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len, + flags, xdata); + +out: + return stub; +} + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO("call-stub", fn, out); + + stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE); + GF_VALIDATE_OR_GOTO("call-stub", stub, out); + + stub->fn_cbk.copy_file_range = fn; + args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf, + prebuf_dst, postbuf_dst, xdata); + +out: + return stub; +} + +call_stub_t * fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode, mode_t umask, uint32_t flags, struct iovec *vector, int32_t count, off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata) { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", vector, out); stub = stub_new(frame, 1, GF_FOP_PUT); @@ -1844,8 +1734,6 @@ fop_put_cbk_stub(call_frame_t *frame, fop_put_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_PUT); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1862,7 +1750,6 @@ fop_icreate_stub(call_frame_t *frame, fop_icreate_t fn, loc_t *loc, mode_t mode, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_ICREATE); @@ -1902,8 +1789,6 @@ fop_icreate_cbk_stub(call_frame_t *frame, fop_icreate_cbk_t fn, int32_t op_ret, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_ICREATE); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -1921,7 +1806,6 @@ fop_namelink_stub(call_frame_t *frame, fop_namelink_t fn, loc_t *loc, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); GF_VALIDATE_OR_GOTO("call-stub", fn, out); stub = stub_new(frame, 1, GF_FOP_NAMELINK); @@ -1961,8 +1845,6 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn, { call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO("call-stub", frame, out); - stub = stub_new(frame, 0, GF_FOP_NAMELINK); GF_VALIDATE_OR_GOTO("call-stub", stub, out); @@ -2213,6 +2095,13 @@ call_resume_wind(call_stub_t *stub) stub->args.iobref, stub->args.xattr, stub->args.xdata); break; + case GF_FOP_COPY_FILE_RANGE: + stub->fn.copy_file_range( + stub->frame, stub->frame->this, stub->args.fd, + stub->args.off_in, stub->args.fd_dst, stub->args.off_out, + stub->args.size, stub->args.flags, stub->args.xdata); + break; + default: gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ENTRY, @@ -2439,6 +2328,12 @@ call_resume_unwind(call_stub_t *stub) stub->args_cbk.xdata); break; + case GF_FOP_COPY_FILE_RANGE: + STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat, + &stub->args_cbk.prestat, &stub->args_cbk.poststat, + stub->args_cbk.xdata); + break; + default: gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ENTRY, diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c index cb37ed30ea2..913115c7be1 100644 --- a/libglusterfs/src/circ-buff.c +++ b/libglusterfs/src/circ-buff.c @@ -8,8 +8,8 @@ cases as published by the Free Software Foundation. */ -#include "circ-buff.h" -#include "libglusterfs-messages.h" +#include "glusterfs/circ-buff.h" +#include "glusterfs/libglusterfs-messages.h" void cb_destroy_data(circular_buffer_t *cb, void (*destroy_buffer_data)(void *data)) diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c index 586cbd84e5c..9d377c3c2e1 100644 --- a/libglusterfs/src/client_t.c +++ b/libglusterfs/src/client_t.c @@ -8,13 +8,12 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "dict.h" -#include "statedump.h" -#include "client_t.h" -#include "list.h" -#include "rpcsvc.h" -#include "libglusterfs-messages.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/dict.h" +#include "glusterfs/statedump.h" +#include "glusterfs/client_t.h" +#include "glusterfs/list.h" +#include "glusterfs/libglusterfs-messages.h" static int gf_client_chain_client_entries(cliententry_t *entries, uint32_t startidx, @@ -110,50 +109,13 @@ gf_clienttable_alloc(void) return clienttable; } -void -gf_client_clienttable_destroy(clienttable_t *clienttable) -{ - client_t *client = NULL; - cliententry_t *cliententries = NULL; - uint32_t client_count = 0; - int32_t i = 0; - - if (!clienttable) { - gf_msg_callingfn("client_t", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "!clienttable"); - return; - } - - LOCK(&clienttable->lock); - { - client_count = clienttable->max_clients; - clienttable->max_clients = 0; - cliententries = clienttable->cliententries; - clienttable->cliententries = NULL; - } - UNLOCK(&clienttable->lock); - - if (cliententries != NULL) { - for (i = 0; i < client_count; i++) { - client = cliententries[i].client; - if (client != NULL) { - gf_client_unref(client); - } - } - - GF_FREE(cliententries); - LOCK_DESTROY(&clienttable->lock); - GF_FREE(clienttable); - } -} - /* * Increments ref.bind if the client is already present or creates a new * client with ref.bind = 1,ref.count = 1 it signifies that * as long as ref.bind is > 0 client should be alive. */ client_t * -gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid, +gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid, char *subdir_mount) { client_t *client = NULL; @@ -181,11 +143,10 @@ gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid, * if auth was used, matching auth flavour and data */ if (strcmp(client_uid, client->client_uid) == 0 && - (cred->flavour != AUTH_NONE && - (cred->flavour == client->auth.flavour && - (size_t)cred->datalen == client->auth.len && - memcmp(cred->authdata, client->auth.data, client->auth.len) == - 0))) { + (cred->flavour && (cred->flavour == client->auth.flavour && + (size_t)cred->datalen == client->auth.len && + memcmp(cred->authdata, client->auth.data, + client->auth.len) == 0))) { GF_ATOMIC_INC(client->bind); goto unlock; } @@ -224,9 +185,10 @@ gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid, GF_ATOMIC_INIT(client->bind, 1); GF_ATOMIC_INIT(client->count, 1); + GF_ATOMIC_INIT(client->fd_cnt, 0); client->auth.flavour = cred->flavour; - if (cred->flavour != AUTH_NONE) { + if (cred->flavour) { client->auth.data = GF_MALLOC(cred->datalen, gf_common_mt_client_t); if (client->auth.data == NULL) { GF_FREE(client->scratch_ctx.ctx); @@ -352,8 +314,6 @@ client_destroy(client_t *client) clienttable = client->this->ctx->clienttable; - LOCK_DESTROY(&client->scratch_ctx.lock); - LOCK(&clienttable->lock); { clienttable->cliententries[client->tbl_index].client = NULL; @@ -371,6 +331,8 @@ client_destroy(client_t *client) if (client->subdir_inode) inode_unref(client->subdir_inode); + LOCK_DESTROY(&client->scratch_ctx.lock); + GF_FREE(client->auth.data); GF_FREE(client->auth.username); GF_FREE(client->auth.passwd); @@ -580,62 +542,6 @@ client_ctx_del(client_t *client, void *key, void **value) } void -client_dump(client_t *client, char *prefix) -{ - if (!client) - return; - - gf_proc_dump_write("refcount", "%" GF_PRI_ATOMIC, - GF_ATOMIC_GET(client->count)); -} - -void -cliententry_dump(cliententry_t *cliententry, char *prefix) -{ - if (!cliententry) - return; - - if (GF_CLIENTENTRY_ALLOCATED != cliententry->next_free) - return; - - if (cliententry->client) - client_dump(cliententry->client, prefix); -} - -void -clienttable_dump(clienttable_t *clienttable, char *prefix) -{ - int i = 0; - int ret = -1; - char key[GF_DUMP_MAX_BUF_LEN] = {0}; - - if (!clienttable) - return; - - ret = TRY_LOCK(&clienttable->lock); - { - if (ret) { - gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED, - "Unable to acquire lock"); - return; - } - gf_proc_dump_build_key(key, prefix, "maxclients"); - gf_proc_dump_write(key, "%d", clienttable->max_clients); - gf_proc_dump_build_key(key, prefix, "first_free"); - gf_proc_dump_write(key, "%d", clienttable->first_free); - for (i = 0; i < clienttable->max_clients; i++) { - if (GF_CLIENTENTRY_ALLOCATED == - clienttable->cliententries[i].next_free) { - gf_proc_dump_build_key(key, prefix, "cliententry[%d]", i); - gf_proc_dump_add_section("%s", key); - cliententry_dump(&clienttable->cliententries[i], key); - } - } - } - UNLOCK(&clienttable->lock); -} - -void client_ctx_dump(client_t *client, char *prefix) { #if 0 /* TBD, FIXME */ diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c index dab00577886..6ee89ddfdcf 100644 --- a/libglusterfs/src/cluster-syncop.c +++ b/libglusterfs/src/cluster-syncop.c @@ -14,8 +14,8 @@ /* NOTE: Cluster-syncop, like syncop blocks the executing thread until the * responses are gathered if it is not executed as part of synctask. So it * shouldn't be invoked in epoll worker thread */ -#include "cluster-syncop.h" -#include "defaults.h" +#include "glusterfs/cluster-syncop.h" +#include "glusterfs/defaults.h" #define FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fop, \ args...) \ @@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on, if (num_success) { FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, inodelk, dom, &loc, F_SETLKW, &flock, NULL); + } else { + loc_wipe(&loc); + memset(locked_on, 0, numsubvols); + return 0; } break; } @@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on, entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); } else { + loc_wipe(&loc); memset(locked_on, 0, numsubvols); + return 0; } break; } diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index aae9858fe22..682cbf28055 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -24,7 +24,6 @@ #include <time.h> #include <locale.h> #include <sys/socket.h> -#include <sys/wait.h> #include <netinet/in.h> #include <arpa/inet.h> #include <signal.h> @@ -35,25 +34,26 @@ #if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS) #include <sys/sysctl.h> #endif -#include <libgen.h> #ifndef GF_LINUX_HOST_OS #include <sys/resource.h> #endif +#ifdef HAVE_SYNCFS_SYS +#include <sys/syscall.h> +#endif -#include "compat-errno.h" -#include "logging.h" -#include "common-utils.h" -#include "revision.h" -#include "glusterfs.h" -#include "stack.h" -#include "lkowner.h" -#include "syscall.h" -#include "cli1-xdr.h" +#include "glusterfs/compat-errno.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/revision.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/stack.h" +#include "glusterfs/lkowner.h" +#include "glusterfs/syscall.h" +#include "glusterfs/globals.h" #define XXH_INLINE_ALL #include "xxhash.h" #include <ifaddrs.h> -#include "libglusterfs-messages.h" -#include "protocol-common.h" +#include "glusterfs/libglusterfs-messages.h" +#include "glusterfs/glusterfs-acl.h" #ifdef __FreeBSD__ #include <pthread_np.h> #undef BIT_SET @@ -65,20 +65,29 @@ char *vol_type_str[] = { "Distribute", - "Stripe", + "Stripe [NOT SUPPORTED from v6.0]", "Replicate", - "Striped-Replicate", + "Striped-Replicate [NOT SUPPORTED from v6.0]", "Disperse", - "Tier", - "Distributed-Stripe", + "Tier [NOT SUPPORTED from v6.0]", + "Distributed-Stripe [NOT SUPPORTED from v6.0]", "Distributed-Replicate", - "Distributed-Striped-Replicate", + "Distributed-Striped-Replicate [NOT SUPPORTED from v6.0]", "Distributed-Disperse", }; typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size); typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size); +char *xattrs_to_heal[] = {"user.", + POSIX_ACL_ACCESS_XATTR, + POSIX_ACL_DEFAULT_XATTR, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + GF_SELINUX_XATTR_KEY, + GF_XATTR_MDATA_KEY, + NULL}; + void gf_xxh64_wrapper(const unsigned char *data, size_t const len, unsigned long long const seed, char *xxh64) @@ -306,8 +315,7 @@ mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks) dir[i] = '\0'; ret = sys_mkdir(dir, mode); if (ret && errno != EEXIST) { - gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, - "Failed due to reason"); + gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, NULL); goto out; } @@ -318,10 +326,8 @@ mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks) if (S_ISLNK(stbuf.st_mode)) { ret = -1; - gf_msg("", GF_LOG_ERROR, 0, LG_MSG_DIR_IS_SYMLINK, - "%s is a " - "symlink", - dir); + gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_DIR_IS_SYMLINK, "dir=%s", + dir, NULL); goto out; } } @@ -334,10 +340,10 @@ mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks) if (ret == 0) errno = 0; ret = -1; - gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, - "Failed" - " to create directory, possibly some of the components" - " were not directories"); + gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, + "possibly some of the components" + " were not directories", + NULL); goto out; } @@ -410,10 +416,8 @@ gf_rev_dns_lookup(const char *ip) /* Get the FQDN */ ret = gf_get_hostname_from_ip((char *)ip, &fqdn); if (ret != 0) { - gf_msg("resolver", GF_LOG_INFO, errno, LG_MSG_RESOLVE_HOSTNAME_FAILED, - "could not resolve " - "hostname for %s", - ip); + gf_smsg("resolver", GF_LOG_INFO, errno, LG_MSG_RESOLVE_HOSTNAME_FAILED, + "hostname=%s", ip, NULL); } out: return fqdn; @@ -434,7 +438,7 @@ gf_resolve_path_parent(const char *path) GF_VALIDATE_OR_GOTO(THIS->name, path, out); - if (strlen(path) <= 0) { + if (0 == strlen(path)) { gf_msg_callingfn(THIS->name, GF_LOG_DEBUG, 0, LG_MSG_INVALID_STRING, "invalid string for 'path'"); goto out; @@ -502,10 +506,8 @@ gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache, } if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) != 0) { - gf_msg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED, - "getaddrinfo failed" - " (%s)", - gai_strerror(ret)); + gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED, + "family=%d", family, "ret=%s", gai_strerror(ret), NULL); GF_FREE(*dnscache); *dnscache = NULL; @@ -522,10 +524,8 @@ gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache, cache->next->ai_addrlen, host, sizeof(host), service, sizeof(service), NI_NUMERICHOST); if (ret != 0) { - gf_msg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, - "getnameinfo failed" - " (%s)", - gai_strerror(ret)); + gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, + "ret=%s", gai_strerror(ret), NULL); goto err; } @@ -544,10 +544,8 @@ gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache, cache->next->ai_addrlen, host, sizeof(host), service, sizeof(service), NI_NUMERICHOST); if (ret != 0) { - gf_msg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, - "getnameinfo failed" - " (%s)", - gai_strerror(ret)); + gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, + "ret=%s", gai_strerror(ret), NULL); goto err; } @@ -579,8 +577,14 @@ struct dnscache * gf_dnscache_init(time_t ttl) { struct dnscache *cache = GF_MALLOC(sizeof(*cache), gf_common_mt_dnscache); - if (cache) { - cache->cache_dict = NULL; + if (!cache) + return NULL; + + cache->cache_dict = dict_new(); + if (!cache->cache_dict) { + GF_FREE(cache); + cache = NULL; + } else { cache->ttl = ttl; } @@ -588,6 +592,20 @@ gf_dnscache_init(time_t ttl) } /** + * gf_dnscache_deinit -- cleanup resources used by struct dnscache + */ +void +gf_dnscache_deinit(struct dnscache *cache) +{ + if (!cache) { + gf_msg_plain(GF_LOG_WARNING, "dnscache is NULL"); + return; + } + dict_unref(cache->cache_dict); + GF_FREE(cache); +} + +/** * gf_dnscache_entry_init -- Initialize a dnscache entry * * @return: SUCCESS: Pointer to an allocated dnscache entry struct @@ -635,12 +653,6 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) if (!dnscache) goto out; - if (!dnscache->cache_dict) { - dnscache->cache_dict = dict_new(); - if (!dnscache->cache_dict) { - goto out; - } - } cache = dnscache->cache_dict; /* Quick cache lookup to see if we already hold it */ @@ -648,7 +660,7 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) if (entrydata) { dnsentry = (struct dnscache_entry *)entrydata->data; /* First check the TTL & timestamp */ - if (time(NULL) - dnsentry->timestamp > dnscache->ttl) { + if (gf_time() - dnsentry->timestamp > dnscache->ttl) { gf_dnscache_entry_deinit(dnsentry); entrydata->data = NULL; /* Mark this as 'null' so * dict_del () doesn't try free @@ -679,23 +691,16 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) from_cache = _gf_false; out: /* Insert into the cache */ - if (fqdn && !from_cache) { + if (fqdn && !from_cache && ip) { struct dnscache_entry *entry = gf_dnscache_entry_init(); - if (!entry) { - goto out; + if (entry) { + entry->fqdn = fqdn; + entry->ip = gf_strdup(ip); + entry->timestamp = gf_time(); + entrydata = bin_to_data(entry, sizeof(*entry)); + dict_set(cache, (char *)ip, entrydata); } - entry->fqdn = fqdn; - if (!ip) { - gf_dnscache_entry_deinit(entry); - goto out; - } - - entry->ip = gf_strdup(ip); - entry->timestamp = time(NULL); - - entrydata = bin_to_data(entry, sizeof(*entry)); - dict_set(cache, (char *)ip, entrydata); } return fqdn; } @@ -750,7 +755,6 @@ xldump_subvolumes(xlator_t *this, void *d) int len = 0; char *subvstr = NULL; - subv = this->children; if (!this->children) return; @@ -905,7 +909,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx) char msg[1024] = { 0, }; - char timestr[64] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; call_stack_t *stack = NULL; @@ -945,7 +949,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx) { /* Dump the timestamp of the crash too, so the previous logs can be related */ - gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT); gf_msg_plain_nomem(GF_LOG_ALERT, "time of crash: "); gf_msg_plain_nomem(GF_LOG_ALERT, timestr); } @@ -1703,7 +1707,7 @@ gf_uint64_2human_readable(uint64_t n) if (ret < 0) goto err; } else { - ret = gf_asprintf(&str, "%luBytes", n); + ret = gf_asprintf(&str, "%" PRIu64 "Bytes", n); if (ret < 0) goto err; } @@ -1800,22 +1804,6 @@ gf_string2bytesize_range(const char *str, uint64_t *n, uint64_t umax) } int -gf_string2bytesize_size(const char *str, size_t *n) -{ - uint64_t u64; - size_t max = (size_t)-1; - int val = gf_string2bytesize_range(str, &u64, max); - *n = (size_t)u64; - return val; -} - -int -gf_string2bytesize(const char *str, uint64_t *n) -{ - return gf_string2bytesize_range(str, n, UINT64_MAX); -} - -int gf_string2bytesize_uint64(const char *str, uint64_t *n) { return gf_string2bytesize_range(str, n, UINT64_MAX); @@ -1963,6 +1951,74 @@ gf_string2boolean(const char *str, gf_boolean_t *b) } int +gf_strn2boolean(const char *str, const int len, gf_boolean_t *b) +{ + if (str == NULL) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, + "argument invalid"); + return -1; + } + + switch (len) { + case 1: + if (strcasecmp(str, "1") == 0) { + *b = _gf_true; + return 0; + } else if (strcasecmp(str, "0") == 0) { + *b = _gf_false; + return 0; + } + break; + case 2: + if (strcasecmp(str, "on") == 0) { + *b = _gf_true; + return 0; + } else if (strcasecmp(str, "no") == 0) { + *b = _gf_false; + return 0; + } + break; + case 3: + if (strcasecmp(str, "yes") == 0) { + *b = _gf_true; + return 0; + } else if (strcasecmp(str, "off") == 0) { + *b = _gf_false; + return 0; + } + break; + case 4: + if (strcasecmp(str, "true") == 0) { + *b = _gf_true; + return 0; + } + break; + case 5: + if (strcasecmp(str, "false") == 0) { + *b = _gf_false; + return 0; + } + break; + case 6: + if (strcasecmp(str, "enable") == 0) { + *b = _gf_true; + return 0; + } + break; + case 7: + if (strcasecmp(str, "disable") == 0) { + *b = _gf_false; + return 0; + } + break; + default: + return -1; + break; + } + return -1; +} + +int gf_lockfd(int fd) { struct gf_flock fl; @@ -1989,7 +2045,7 @@ gf_unlockfd(int fd) } static void -compute_checksum(char *buf, size_t size, uint32_t *checksum) +compute_checksum(char *buf, const ssize_t size, uint32_t *checksum) { int ret = -1; char *checksum_buf = NULL; @@ -2020,7 +2076,7 @@ compute_checksum(char *buf, size_t size, uint32_t *checksum) #define GF_CHECKSUM_BUF_SIZE 1024 int -get_checksum_for_file(int fd, uint32_t *checksum) +get_checksum_for_file(int fd, uint32_t *checksum, int op_version) { int ret = -1; char buf[GF_CHECKSUM_BUF_SIZE] = { @@ -2031,8 +2087,12 @@ get_checksum_for_file(int fd, uint32_t *checksum) sys_lseek(fd, 0L, SEEK_SET); do { ret = sys_read(fd, &buf, GF_CHECKSUM_BUF_SIZE); - if (ret > 0) - compute_checksum(buf, GF_CHECKSUM_BUF_SIZE, checksum); + if (ret > 0) { + if (op_version < GD_OP_VERSION_5_4) + compute_checksum(buf, GF_CHECKSUM_BUF_SIZE, checksum); + else + compute_checksum(buf, ret, checksum); + } } while (ret > 0); /* set it back */ @@ -2042,7 +2102,7 @@ get_checksum_for_file(int fd, uint32_t *checksum) } int -get_checksum_for_path(char *path, uint32_t *checksum) +get_checksum_for_path(char *path, uint32_t *checksum, int op_version) { int ret = -1; int fd = -1; @@ -2053,12 +2113,12 @@ get_checksum_for_path(char *path, uint32_t *checksum) fd = open(path, O_RDWR); if (fd == -1) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_PATH_ERROR, - "Unable to open %s", path); + gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_PATH_OPEN_FAILED, + "path=%s", path, NULL); goto out; } - ret = get_checksum_for_file(fd, checksum); + ret = get_checksum_for_file(fd, checksum, op_version); out: if (fd != -1) @@ -2087,8 +2147,8 @@ get_file_mtime(const char *path, time_t *stamp) ret = sys_stat(path, &f_stat); if (ret < 0) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED, - "failed to stat %s", path); + gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED, + "path=%s", path, NULL); goto out; } @@ -2107,7 +2167,7 @@ out: * @ip_str : The IP to check * @network: The network to check the IP against. * - * @return: success: 0 + * @return: success: _gf_true * failure: -EINVAL for bad args, retval of inet_pton otherwise */ gf_boolean_t @@ -2132,7 +2192,6 @@ gf_is_ip_in_net(const char *network, const char *ip_str) else if (strchr(network, '.')) family = AF_INET; else { - family = -1; goto out; } @@ -2148,14 +2207,14 @@ gf_is_ip_in_net(const char *network, const char *ip_str) /* Convert IP address to a long */ ret = inet_pton(family, ip_str, &ip_buf); if (ret < 0) - gf_msg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED, - "inet_pton() failed"); + gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED, + NULL); /* Convert network IP address to a long */ ret = inet_pton(family, net_ip, &net_ip_buf); if (ret < 0) { - gf_msg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED, - "inet_pton() failed"); + gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED, + NULL); goto out; } @@ -2299,7 +2358,7 @@ next_token(char **tokenp, token_iter_t *tit) * #include <stdio.h> * #include <stdlib.h> * #include <string.h> - * #include "common-utils.h" + * #include "glusterfs/common-utils.h" * * int * main (int argc, char **argv) @@ -2449,7 +2508,6 @@ valid_ipv4_address(char *address, int length, gf_boolean_t wildcard_acc) goto out; } - prev = tmp; prev = strtok_r(tmp, ".", &ptr); while (prev != NULL) { @@ -2476,6 +2534,31 @@ out: return ret; } +char +valid_cidr_address(char *cidr_address, gf_boolean_t wildcard_acc) +{ + unsigned int net_mask = 0, len = 0; + char *temp = NULL, *cidr_str = NULL, ret = 1; + + cidr_str = strdupa(cidr_address); + temp = strstr(cidr_str, "/"); + if (temp == NULL) + return 0; /* Since Invalid cidr ip address we return 0 */ + + *temp = '\0'; + temp++; + net_mask = (unsigned int)atoi(temp); + + if (net_mask > 32 || net_mask < 1) + return 0; /* Since Invalid cidr ip address we return 0*/ + + len = strlen(cidr_str); + + ret = valid_ipv4_address(cidr_str, len, wildcard_acc); + + return ret; +} + /** * valid_ipv4_subnetwork() takes the pattern and checks if it contains * a valid ipv4 subnetwork pattern i.e. xx.xx.xx.xx/n. IPv4 address @@ -2612,7 +2695,8 @@ out: } char -valid_internet_address(char *address, gf_boolean_t wildcard_acc) +valid_internet_address(char *address, gf_boolean_t wildcard_acc, + gf_boolean_t cidr) { char ret = 0; int length = 0; @@ -2627,6 +2711,10 @@ valid_internet_address(char *address, gf_boolean_t wildcard_acc) if (length == 0) goto out; + if (cidr && valid_cidr_address(address, wildcard_acc)) { + ret = 1; + } + if (valid_ipv4_address(address, length, wildcard_acc) || valid_ipv6_address(address, length, wildcard_acc) || valid_host_name(address, length)) @@ -2706,8 +2794,8 @@ gf_boolean_t gf_sock_union_equal_addr(union gf_sock_union *a, union gf_sock_union *b) { if (!a || !b) { - gf_msg("common-utils", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, - "Invalid arguments to gf_sock_union_equal_addr"); + gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, + "gf_sock_union_equal_addr", NULL); return _gf_false; } @@ -2935,8 +3023,8 @@ gf_roundup_power_of_two(int32_t nr) int32_t result = 1; if (nr < 0) { - gf_msg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED, - "negative number passed"); + gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED, + NULL); result = -1; goto out; } @@ -2959,8 +3047,8 @@ gf_roundup_next_power_of_two(int32_t nr) int32_t result = 1; if (nr < 0) { - gf_msg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED, - "negative number passed"); + gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED, + NULL); result = -1; goto out; } @@ -2973,16 +3061,6 @@ out: } int -get_vol_type(int type, int dist_count, int brick_count) -{ - if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) && - (dist_count < brick_count)) - type = type + GF_CLUSTER_TYPE_MAX - 1; - - return type; -} - -int validate_brick_name(char *brick) { char *delimiter = NULL; @@ -3055,7 +3133,7 @@ get_mem_size() memsize = page_size * num_pages; #endif -#if defined GF_DARWIN_HOST_OS +#if defined GF_DARWIN_HOST_OS || defined __FreeBSD__ size_t len = sizeof(memsize); int name[] = {CTL_HW, HW_PHYSMEM}; @@ -3132,7 +3210,7 @@ gf_canonicalize_path(char *path) while (dir) { dir_path_len = strlen(dir); - strncpy((path + path_len + 1), dir, dir_path_len); + memcpy((path + path_len + 1), dir, dir_path_len); path_len += dir_path_len + 1; dir = strtok_r(NULL, "/", &tmpstr); if (dir) { @@ -3144,8 +3222,7 @@ gf_canonicalize_path(char *path) out: if (ret) - gf_msg("common-utils", GF_LOG_ERROR, 0, LG_MSG_PATH_ERROR, - "Path manipulation failed"); + gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_PATH_ERROR, NULL); GF_FREE(tmppath); @@ -3199,19 +3276,15 @@ gf_get_reserved_ports() * continue with older method of using any of the available * port? For now 2nd option is considered. */ - gf_msg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, - "could not open the file " - "/proc/sys/net/ipv4/ip_local_reserved_ports for " - "getting reserved ports info"); + gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, + " /proc/sys/net/ipv4/ip_local_reserved_ports", NULL); goto out; } ret = sys_read(proc_fd, buffer, sizeof(buffer) - 1); if (ret < 0) { - gf_msg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, - "could not read the file %s for" - " getting reserved ports info", - proc_file); + gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, + "file=%s", proc_file, NULL); goto out; } @@ -3239,10 +3312,8 @@ gf_process_reserved_ports(unsigned char *ports, uint32_t ceiling) ports_info = gf_get_reserved_ports(); if (!ports_info) { - gf_msg("glusterfs", GF_LOG_WARNING, 0, LG_MSG_RESERVED_PORTS_ERROR, - "Not able to get reserved" - " ports, hence there is a possibility that glusterfs " - "may consume reserved port"); + gf_smsg("glusterfs", GF_LOG_WARNING, 0, LG_MSG_RESERVED_PORTS_ERROR, + NULL); goto out; } @@ -3278,9 +3349,9 @@ gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling) if (blocked_port[strlen(blocked_port) - 1] == '\n') blocked_port[strlen(blocked_port) - 1] = '\0'; if (gf_string2int32(blocked_port, &tmp_port1) == 0) { - if (tmp_port1 > ceiling || tmp_port1 < 0) { - gf_msg("glusterfs-socket", GF_LOG_WARNING, 0, - LG_MSG_INVALID_PORT, "invalid port %d", tmp_port1); + if (tmp_port1 > GF_PORT_MAX || tmp_port1 < 0) { + gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0, + LG_MSG_INVALID_PORT, "port=%d", tmp_port1, NULL); result = _gf_true; goto out; } else { @@ -3291,10 +3362,8 @@ gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling) BIT_SET(ports, tmp_port1); } } else { - gf_msg("glusterfs-socket", GF_LOG_WARNING, 0, LG_MSG_INVALID_PORT, - "%s is not a valid port " - "identifier", - blocked_port); + gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0, LG_MSG_INVALID_PORT, + "port=%s", blocked_port, NULL); result = _gf_true; goto out; } @@ -3396,10 +3465,8 @@ gf_get_hostname_from_ip(char *client_ip, char **hostname) ret = getnameinfo(client_sockaddr, addr_sz, client_hostname, sizeof(client_hostname), NULL, 0, 0); if (ret) { - gf_msg("common-utils", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, - "Could not lookup hostname " - "of %s : %s", - client_ip, gai_strerror(ret)); + gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, + "ip=%s", client_ip, "ret=%s", gai_strerror(ret), NULL); ret = -1; goto out; } @@ -3428,8 +3495,8 @@ gf_interface_search(char *ip) ret = getifaddrs(&ifaddr); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETIFADDRS_FAILED, - "getifaddrs() failed: %s\n", gai_strerror(ret)); + gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETIFADDRS_FAILED, "ret=%s", + gai_strerror(ret), NULL); goto out; } @@ -3453,10 +3520,8 @@ gf_interface_search(char *ip) host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, - "getnameinfo() " - "failed: %s\n", - gai_strerror(ret)); + gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED, + "ret=%s", gai_strerror(ret), NULL); goto out; } @@ -3506,14 +3571,12 @@ get_ip_from_addrinfo(struct addrinfo *addr, char **ip) break; default: - gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY, - "Invalid family"); + gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY, NULL); return NULL; } if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) { - gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_CONVERSION_FAILED, - "String conversion failed"); + gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_CONVERSION_FAILED, NULL); return NULL; } @@ -3548,10 +3611,9 @@ gf_is_loopback_localhost(const struct sockaddr *sa, char *hostname) default: if (hostname) - gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY, - "unknown " - "address family %d for %s", - sa->sa_family, hostname); + gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY, + "family=%d", sa->sa_family, "hostname=%s", hostname, + NULL); break; } @@ -3581,23 +3643,27 @@ gf_is_local_addr(char *hostname) ret = getaddrinfo(hostname, NULL, &hints, &result); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED, - "error in getaddrinfo: %s\n", gai_strerror(ret)); + gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED, + "ret=%s", gai_strerror(ret), NULL); goto out; } for (res = result; res != NULL; res = res->ai_next) { - gf_msg_debug(this->name, 0, "%s ", get_ip_from_addrinfo(res, &ip)); + get_ip_from_addrinfo(res, &ip); + gf_msg_debug(this->name, 0, "%s ", ip); if (ip) { - found = gf_is_loopback_localhost(res->ai_addr, hostname) || - gf_interface_search(ip); + found = (gf_is_loopback_localhost(res->ai_addr, hostname) || + gf_interface_search(ip)); } if (found) { GF_FREE(ip); goto out; } GF_FREE(ip); + /* the above free will not set ip to NULL, and hence, there is + double free possible as the loop continues. set ip to NULL. */ + ip = NULL; } out: @@ -3626,15 +3692,15 @@ gf_is_same_address(char *name1, char *name2) gai_err = getaddrinfo(name1, NULL, &hints, &addr1); if (gai_err != 0) { - gf_msg(name1, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, - "error in getaddrinfo: %s\n", gai_strerror(gai_err)); + gf_smsg(name1, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s", + gai_strerror(gai_err), NULL); goto out; } gai_err = getaddrinfo(name2, NULL, &hints, &addr2); if (gai_err != 0) { - gf_msg(name2, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, - "error in getaddrinfo: %s\n", gai_strerror(gai_err)); + gf_smsg(name2, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s", + gai_strerror(gai_err), NULL); goto out; } @@ -3771,8 +3837,10 @@ gf_set_volfile_server_common(cmd_args_t *cmd_args, const char *host, if ((!strcmp(tmp->volfile_server, server->volfile_server) && !strcmp(tmp->transport, server->transport) && (tmp->port == server->port))) { - errno = EEXIST; - ret = -1; + /* Duplicate option given, log and ignore */ + gf_smsg("gluster", GF_LOG_INFO, EEXIST, LG_MSG_DUPLICATE_ENTRY, + NULL); + ret = 0; goto out; } } @@ -3935,19 +4003,63 @@ error_return: return ret; } +void +gf_thread_set_vname(pthread_t thread, const char *name, va_list args) +{ + char thread_name[GF_THREAD_NAME_LIMIT]; + int ret; + + /* Initialize the thread name with the prefix (not NULL terminated). */ + memcpy(thread_name, GF_THREAD_NAME_PREFIX, + sizeof(GF_THREAD_NAME_PREFIX) - 1); + + ret = vsnprintf(thread_name + sizeof(GF_THREAD_NAME_PREFIX) - 1, + sizeof(thread_name) - sizeof(GF_THREAD_NAME_PREFIX) + 1, + name, args); + if (ret < 0) { + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PTHREAD_NAMING_FAILED, + "name=%s", name, NULL); + return; + } + + if (ret >= sizeof(thread_name)) { + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_THREAD_NAME_TOO_LONG, + "name=%s", thread_name, NULL); + } + +#ifdef GF_LINUX_HOST_OS + ret = pthread_setname_np(thread, thread_name); +#elif defined(__NetBSD__) + ret = pthread_setname_np(thread, thread_name, NULL); +#elif defined(__FreeBSD__) + pthread_set_name_np(thread, thread_name); + ret = 0; +#else + ret = ENOSYS; +#endif + if (ret != 0) { + gf_smsg(THIS->name, GF_LOG_WARNING, ret, LG_MSG_SET_THREAD_FAILED, + "name=%s", thread_name, NULL); + } +} + +void +gf_thread_set_name(pthread_t thread, const char *name, ...) +{ + va_list args; + + va_start(args, name); + gf_thread_set_vname(thread, name, args); + va_end(args); +} + int -gf_thread_create(pthread_t *thread, const pthread_attr_t *attr, - void *(*start_routine)(void *), void *arg, const char *name) +gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg, const char *name, + va_list args) { sigset_t set, old; int ret; - char thread_name[GF_THREAD_NAMEMAX + GF_THREAD_NAME_PREFIX_LEN] = { - 0, - }; - /* Max name on Linux is 16 and on NetBSD is 32 - * All Gluster threads have a set prefix of gluster and hence the limit - * of 9 on GF_THREAD_NAMEMAX including the null character. - */ sigemptyset(&old); sigfillset(&set); @@ -3961,20 +4073,12 @@ gf_thread_create(pthread_t *thread, const pthread_attr_t *attr, pthread_sigmask(SIG_BLOCK, &set, &old); ret = pthread_create(thread, attr, start_routine, arg); - snprintf(thread_name, sizeof(thread_name), "%s%s", GF_THREAD_NAME_PREFIX, - name); - - if (0 == ret && name) { -#ifdef GF_LINUX_HOST_OS - pthread_setname_np(*thread, thread_name); -#elif defined(__NetBSD__) - pthread_setname_np(*thread, thread_name, NULL); -#elif defined(__FreeBSD__) - pthread_set_name_np(*thread, thread_name); -#else - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PTHREAD_NAMING_FAILED, - "Could not set thread name: %s", thread_name); -#endif + if (ret != 0) { + gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_THREAD_CREATE_FAILED, + NULL); + ret = -1; + } else if (name != NULL) { + gf_thread_set_vname(*thread, name, args); } pthread_sigmask(SIG_SETMASK, &old, NULL); @@ -3983,27 +4087,40 @@ gf_thread_create(pthread_t *thread, const pthread_attr_t *attr, } int +gf_thread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg, const char *name, + ...) +{ + va_list args; + int ret; + + va_start(args, name); + ret = gf_thread_vcreate(thread, attr, start_routine, arg, name, args); + va_end(args); + + return ret; +} + +int gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *), - void *arg, const char *name) + void *arg, const char *name, ...) { pthread_attr_t attr; + va_list args; int ret = -1; ret = pthread_attr_init(&attr); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_ATTR_INIT_FAILED, - "Thread attribute initialization failed"); + gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_ATTR_INIT_FAILED, + NULL); return -1; } pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - ret = gf_thread_create(thread, &attr, start_routine, arg, name); - if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_FAILED, - "Thread creation failed"); - ret = -1; - } + va_start(args, name); + ret = gf_thread_vcreate(thread, &attr, start_routine, arg, name, args); + va_end(args); pthread_attr_destroy(&attr); @@ -4017,8 +4134,7 @@ gf_skip_header_section(int fd, int header_len) ret = sys_lseek(fd, header_len, SEEK_SET); if (ret == (off_t)-1) { - gf_msg("", GF_LOG_ERROR, 0, LG_MSG_SKIP_HEADER_FAILED, - "Failed to skip header section"); + gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_SKIP_HEADER_FAILED, NULL); } else { ret = 0; } @@ -4031,16 +4147,28 @@ gf_skip_header_section(int fd, int header_len) gf_boolean_t gf_is_pid_running(int pid) { +#ifdef __FreeBSD__ + int ret = -1; + + ret = sys_kill(pid, 0); + if (ret < 0) { + return _gf_false; + } +#else char fname[32] = { 0, }; + int fd = -1; snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid); - if (sys_access(fname, R_OK) != 0) { + fd = sys_open(fname, O_RDONLY, 0); + if (fd < 0) { return _gf_false; } + sys_close(fd); +#endif return _gf_true; } @@ -4061,14 +4189,15 @@ gf_is_service_running(char *pidfile, int *pid) ret = lockf(fno, F_TEST, 0); if (ret == -1) { running = _gf_true; - goto out; } ret = fscanf(file, "%d", pid); if (ret <= 0) { - gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, - "Unable to read pidfile: %s", pidfile); + gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, "pidfile=%s", + pidfile, NULL); *pid = -1; + running = _gf_false; + goto out; } running = gf_is_pid_running(*pid); @@ -4139,10 +4268,10 @@ gf_check_log_format(const char *value) log_format = gf_logformat_withmsgid; if (log_format == -1) - gf_msg( - THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG, - "Invalid log-format. possible values are " GF_LOG_FORMAT_NO_MSG_ID - "|" GF_LOG_FORMAT_WITH_MSG_ID); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG, + "possible_values=" GF_LOG_FORMAT_NO_MSG_ID + "|" GF_LOG_FORMAT_WITH_MSG_ID, + NULL); return log_format; } @@ -4158,9 +4287,9 @@ gf_check_logger(const char *value) logger = gf_logger_syslog; if (logger == -1) - gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG, - "Invalid logger. possible values are " GF_LOGGER_GLUSTER_LOG - "|" GF_LOGGER_SYSLOG); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG, + "possible_values=" GF_LOGGER_GLUSTER_LOG "|" GF_LOGGER_SYSLOG, + NULL); return logger; } @@ -4232,8 +4361,8 @@ gf_set_timestamp(const char *src, const char *dest) ret = sys_stat(src, &sb); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED, - "stat on %s", src); + gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED, + "stat=%s", src, NULL); goto out; } /* The granularity is nano seconds if `utimensat()` is available, @@ -4249,8 +4378,8 @@ gf_set_timestamp(const char *src, const char *dest) /* dirfd = 0 is ignored because `dest` is an absolute path. */ ret = sys_utimensat(AT_FDCWD, dest, new_time, AT_SYMLINK_NOFOLLOW); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMENSAT_FAILED, - "utimensat on %s", dest); + gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMENSAT_FAILED, + "dest=%s", dest, NULL); } #else new_time[0].tv_sec = sb.st_atime; @@ -4261,8 +4390,8 @@ gf_set_timestamp(const char *src, const char *dest) ret = sys_utimes(dest, new_time); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMES_FAILED, - "utimes on %s", dest); + gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMES_FAILED, + "dest=%s", dest, NULL); } #endif out: @@ -4281,7 +4410,7 @@ gf_backtrace_end(char *buf, size_t frames) frames = min(frames, GF_BACKTRACE_LEN - pos - 1); - if (frames <= 0) + if (0 == frames) return; memset(buf + pos, ')', frames); @@ -4327,8 +4456,8 @@ gf_backtrace_fillframes(char *buf) */ ret = sys_unlink(tmpl); if (ret < 0) { - gf_msg(THIS->name, GF_LOG_INFO, 0, LG_MSG_FILE_OP_FAILED, - "Unable to delete temporary file: %s", tmpl); + gf_smsg(THIS->name, GF_LOG_INFO, 0, LG_MSG_FILE_DELETE_FAILED, + "temporary_file=%s", tmpl, NULL); } /*The most recent two frames are the calling function and @@ -4339,7 +4468,6 @@ gf_backtrace_fillframes(char *buf) fp = fdopen(fd, "r"); if (!fp) { sys_close(fd); - ret = -1; goto out; } @@ -4349,7 +4477,7 @@ gf_backtrace_fillframes(char *buf) pos = 0; for (idx = 0; idx < frames - 2; idx++) { - ret = fscanf(fp, "%s", callingfn[idx]); + ret = fscanf(fp, "%1023s", callingfn[idx]); if (ret == EOF) break; inc = gf_backtrace_append(buf, pos, callingfn[idx]); @@ -4389,8 +4517,7 @@ gf_backtrace_save(char *buf) if ((0 == gf_backtrace_fillframes(bt))) return bt; - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_BACKTRACE_SAVE_FAILED, - "Failed to save the backtrace."); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_BACKTRACE_SAVE_FAILED, NULL); return NULL; } @@ -4428,9 +4555,13 @@ fop_log_level(glusterfs_fop_t fop, int op_errno) return GF_LOG_DEBUG; if (fop == GF_FOP_SEEK) { +#ifdef HAVE_SEEK_HOLE if (op_errno == ENXIO) { return GF_LOG_DEBUG; } +#else + return GF_LOG_DEBUG; +#endif } return GF_LOG_ERROR; @@ -4477,16 +4608,16 @@ gf_build_absolute_path(char *current_path, char *relative_path, char **path) */ currentpath_len = strlen(current_path); if (current_path[0] != '/' || (currentpath_len > PATH_MAX)) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, - "Wrong value for current path %s", current_path); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE, + "current-path=%s", current_path, NULL); ret = -EINVAL; goto err; } relativepath_len = strlen(relative_path); if (relative_path[0] == '/' || (relativepath_len > PATH_MAX)) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, - "Wrong value for relative path %s", relative_path); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE, + "relative-path=%s", relative_path, NULL); ret = -EINVAL; goto err; } @@ -4602,8 +4733,9 @@ recursive_rmdir(const char *delete_path) goto out; } - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - while (entry) { + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name); ret = sys_lstat(path, &st); if (ret == -1) { @@ -4629,8 +4761,6 @@ recursive_rmdir(const char *delete_path) gf_msg_debug(this->name, 0, "%s %s", ret ? "Failed to remove" : "Removed", entry->d_name); - - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); } ret = sys_closedir(dir); @@ -4954,8 +5084,8 @@ gf_zero_fill_stat(struct iatt *buf) gf_boolean_t gf_is_valid_xattr_namespace(char *key) { - static char *xattr_namespaces[] = {"trusted.", "security.", "system.", - "user.", NULL}; + static char *xattr_namespaces[] = {"trusted.", "system.", "user.", + "security.", NULL}; int i = 0; for (i = 0; xattr_namespaces[i]; i++) { @@ -5178,62 +5308,6 @@ glusterfs_compute_sha256(const unsigned char *content, size_t size, return 0; } -char * -get_struct_variable(int mem_num, gf_gsync_status_t *sts_val) -{ - switch (mem_num) { - case 0: - return (sts_val->node); - case 1: - return (sts_val->master); - case 2: - return (sts_val->brick); - case 3: - return (sts_val->slave_user); - case 4: - return (sts_val->slave); - case 5: - return (sts_val->slave_node); - case 6: - return (sts_val->worker_status); - case 7: - return (sts_val->crawl_status); - case 8: - return (sts_val->last_synced); - case 9: - return (sts_val->entry); - case 10: - return (sts_val->data); - case 11: - return (sts_val->meta); - case 12: - return (sts_val->failures); - case 13: - return (sts_val->checkpoint_time); - case 14: - return (sts_val->checkpoint_completed); - case 15: - return (sts_val->checkpoint_completion_time); - case 16: - return (sts_val->brick_host_uuid); - case 17: - return (sts_val->last_synced_utc); - case 18: - return (sts_val->checkpoint_time_utc); - case 19: - return (sts_val->checkpoint_completion_time_utc); - case 20: - return (sts_val->slavekey); - case 21: - return (sts_val->session_slave); - default: - goto out; - } - -out: - return NULL; -} - /* * Safe wrapper function for strncpy. * This wrapper makes sure that when there is no null byte among the first n in * source srting for strncpy function call, the string placed in dest will be @@ -5311,3 +5385,81 @@ gf_replace_new_iatt_in_dict(dict_t *xdata) return ret; } + +xlator_cmdline_option_t * +find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args) +{ + xlator_cmdline_option_t *pos = NULL; + xlator_cmdline_option_t *tmp = NULL; + + list_for_each_entry_safe(pos, tmp, &args->xlator_options, cmd_args) + { + if (strcmp(pos->key, option_name) == 0) + return pos; + } + return NULL; +} + +int +gf_d_type_from_ia_type(ia_type_t type) +{ + switch (type) { + case IA_IFDIR: + return DT_DIR; + case IA_IFCHR: + return DT_CHR; + case IA_IFBLK: + return DT_BLK; + case IA_IFIFO: + return DT_FIFO; + case IA_IFLNK: + return DT_LNK; + case IA_IFREG: + return DT_REG; + case IA_IFSOCK: + return DT_SOCK; + default: + return DT_UNKNOWN; + } +} + +int +gf_nanosleep(uint64_t nsec) +{ + struct timespec req; + struct timespec rem; + int ret = -1; + + req.tv_sec = nsec / GF_SEC_IN_NS; + req.tv_nsec = nsec % GF_SEC_IN_NS; + + do { + ret = nanosleep(&req, &rem); + req = rem; + } while (ret == -1 && errno == EINTR); + + return ret; +} + +int +gf_syncfs(int fd) +{ + int ret = 0; +#if defined(HAVE_SYNCFS) + /* Linux with glibc recent enough. */ + ret = syncfs(fd); +#elif defined(HAVE_SYNCFS_SYS) + /* Linux with no library function. */ + ret = syscall(SYS_syncfs, fd); +#else + /* Fallback to generic UNIX stuff. */ + sync(); +#endif + return ret; +} + +char ** +get_xattrs_to_heal() +{ + return xattrs_to_heal; +} diff --git a/libglusterfs/src/compat-errno.c b/libglusterfs/src/compat-errno.c index 5051b75c772..df57e243239 100644 --- a/libglusterfs/src/compat-errno.c +++ b/libglusterfs/src/compat-errno.c @@ -10,7 +10,7 @@ #include <stdint.h> -#include "compat-errno.h" +#include "glusterfs/compat-errno.h" static int32_t gf_error_to_errno_array[1024]; static int32_t gf_errno_to_error_array[1024]; diff --git a/libglusterfs/src/compat.c b/libglusterfs/src/compat.c index 137bdf45cab..8a05a30a8fe 100644 --- a/libglusterfs/src/compat.c +++ b/libglusterfs/src/compat.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ -#include <string.h> #include <stdlib.h> #include <unistd.h> #include <stdarg.h> @@ -16,17 +15,12 @@ #include <sys/types.h> #include <dirent.h> -#ifdef GF_SOLARIS_HOST_OS -#include "logging.h" -#endif /* GF_SOLARIS_HOST_OS */ - -#include "compat.h" -#include "common-utils.h" -#include "iatt.h" -#include "inode.h" -#include "syscall.h" -#include "run.h" -#include "libglusterfs-messages.h" +#include "glusterfs/logging.h" +#include "glusterfs/compat.h" +#include "glusterfs/iatt.h" +#include "glusterfs/syscall.h" +#include "glusterfs/run.h" +#include "glusterfs/libglusterfs-messages.h" #ifdef GF_SOLARIS_HOST_OS int @@ -182,7 +176,7 @@ solaris_xattr_resolve_path(const char *real_path, char **path) if (!ret && export_path) { strcat(export_path, "/" GF_SOLARIS_XATTR_DIR); if (lstat(export_path, &statbuf)) { - ret = mkdir(export_path, 0777); + ret = mkdir(export_path, 0755); if (ret && (errno != EEXIST)) { gf_msg_debug(THIS->name, 0, "mkdir failed," diff --git a/libglusterfs/src/compound-fop-utils.c b/libglusterfs/src/compound-fop-utils.c deleted file mode 100644 index 8bdb8e8fd5b..00000000000 --- a/libglusterfs/src/compound-fop-utils.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "defaults.h" -#include "default-args.h" -#include "mem-types.h" -#include "dict.h" - -void -compound_args_cleanup(compound_args_t *args) -{ - int i; - - if (!args) - return; - - if (args->xdata) - dict_unref(args->xdata); - - if (args->req_list) { - for (i = 0; i < args->fop_length; i++) { - args_wipe(&args->req_list[i]); - } - } - - GF_FREE(args->enum_list); - GF_FREE(args->req_list); - GF_FREE(args); -} - -void -compound_args_cbk_cleanup(compound_args_cbk_t *args_cbk) -{ - int i; - - if (!args_cbk) - return; - - if (args_cbk->xdata) - dict_unref(args_cbk->xdata); - - if (args_cbk->rsp_list) { - for (i = 0; i < args_cbk->fop_length; i++) { - args_cbk_wipe(&args_cbk->rsp_list[i]); - } - } - - GF_FREE(args_cbk->rsp_list); - GF_FREE(args_cbk->enum_list); - GF_FREE(args_cbk); -} - -compound_args_cbk_t * -compound_args_cbk_alloc(int length, dict_t *xdata) -{ - int i = 0; - compound_args_cbk_t *args_cbk = NULL; - - args_cbk = GF_CALLOC(1, sizeof(*args_cbk), gf_mt_compound_rsp_t); - if (!args_cbk) - return NULL; - - args_cbk->fop_length = length; - - args_cbk->rsp_list = GF_CALLOC(length, sizeof(*args_cbk->rsp_list), - gf_mt_default_args_cbk_t); - if (!args_cbk->rsp_list) - goto out; - - for (i = 0; i < length; i++) { - args_cbk_init(&args_cbk->rsp_list[i]); - } - - args_cbk->enum_list = GF_CALLOC(length, sizeof(*args_cbk->enum_list), - gf_common_mt_int); - if (!args_cbk->enum_list) - goto out; - - if (xdata) { - args_cbk->xdata = dict_copy_with_ref(xdata, NULL); - if (!args_cbk->xdata) - goto out; - } - - return args_cbk; -out: - compound_args_cbk_cleanup(args_cbk); - return NULL; -} - -compound_args_t * -compound_fop_alloc(int length, glusterfs_compound_fop_t fop, dict_t *xdata) -{ - compound_args_t *args = NULL; - - args = GF_CALLOC(1, sizeof(*args), gf_mt_compound_req_t); - - if (!args) - return NULL; - - /* fop_enum can be used by xlators to see which fops are - * included as part of compound fop. This will help in checking - * for compatibility or support without going through the entire - * fop list packed. - */ - args->fop_enum = fop; - args->fop_length = length; - - args->enum_list = GF_CALLOC(length, sizeof(*args->enum_list), - gf_common_mt_int); - - if (!args->enum_list) - goto out; - - args->req_list = GF_CALLOC(length, sizeof(*args->req_list), - gf_mt_default_args_t); - - if (!args->req_list) - goto out; - - if (xdata) { - args->xdata = dict_copy_with_ref(xdata, args->xdata); - if (!args->xdata) - goto out; - } - - return args; -out: - compound_args_cleanup(args); - return NULL; -} diff --git a/libglusterfs/src/compound-fop-utils.h b/libglusterfs/src/compound-fop-utils.h deleted file mode 100644 index 23a2f0df3fd..00000000000 --- a/libglusterfs/src/compound-fop-utils.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __COMPOUND_FOP_UTILS_H__ -#define __COMPOUND_FOP_UTILS_H__ - -#include "defaults.h" -#include "default-args.h" -#include "mem-types.h" -#include "dict.h" - -#define COMPOUND_PACK_ARGS(fop, fop_enum, args, counter, params...) \ - do { \ - args->enum_list[counter] = fop_enum; \ - args_##fop##_store(&args->req_list[counter], params); \ - } while (0) - -compound_args_t * -compound_fop_alloc(int length, glusterfs_compound_fop_t fop, dict_t *xdata); - -void -compound_args_cleanup(compound_args_t *args); - -void -compound_args_cbk_cleanup(compound_args_cbk_t *args_cbk); - -compound_args_cbk_t * -compound_args_cbk_alloc(int length, dict_t *xdata); -#endif /* __COMPOUND_FOP_UTILS_H__ */ diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c index 1e4c341df05..3d890b04ec9 100644 --- a/libglusterfs/src/ctx.c +++ b/libglusterfs/src/ctx.c @@ -10,21 +10,19 @@ #include <pthread.h> -#include "globals.h" -#include "glusterfs.h" +#include "glusterfs/globals.h" +#include "glusterfs/glusterfs.h" #include "timer-wheel.h" glusterfs_ctx_t * glusterfs_ctx_new() { - int ret = 0; glusterfs_ctx_t *ctx = NULL; /* no GF_CALLOC here, gf_acct_mem_set_enable is not yet decided at this point */ - ctx = calloc(1, sizeof(*ctx)); + ctx = CALLOC(1, sizeof(*ctx)); if (!ctx) { - ret = -1; goto out; } @@ -39,13 +37,16 @@ glusterfs_ctx_new() ctx->log.loglevel = DEFAULT_LOG_LEVEL; -#ifdef RUN_WITH_VALGRIND - ctx->cmd_args.valgrind = _gf_true; +#if defined(RUN_WITH_MEMCHECK) + ctx->cmd_args.vgtool = _gf_memcheck; +#elif defined(RUN_WITH_DRD) + ctx->cmd_args.vgtool = _gf_drd; +#else + ctx->cmd_args.vgtool = _gf_none; #endif /* lock is never destroyed! */ - ret = LOCK_INIT(&ctx->lock); - if (ret) { + if (LOCK_INIT(&ctx->lock)) { free(ctx); ctx = NULL; goto out; diff --git a/libglusterfs/src/daemon.c b/libglusterfs/src/daemon.c index f821f8f7ed0..0a3e5438325 100644 --- a/libglusterfs/src/daemon.c +++ b/libglusterfs/src/daemon.c @@ -8,10 +8,9 @@ cases as published by the Free Software Foundation. */ -#include <fcntl.h> #include <unistd.h> #include <stdio.h> -#include "daemon.h" +#include "glusterfs/daemon.h" int os_daemon_return(int nochdir, int noclose) diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c index c92f3d46a86..a0ba1cfb299 100644 --- a/libglusterfs/src/default-args.c +++ b/libglusterfs/src/default-args.c @@ -13,8 +13,7 @@ #include "config.h" #endif -#include "xlator.h" -#include "defaults.h" +#include "glusterfs/defaults.h" int args_lookup_store(default_args_t *args, loc_t *loc, dict_t *xdata) @@ -1137,7 +1136,8 @@ args_rchecksum_cbk_store(default_args_cbk_t *args, int32_t op_ret, args->op_errno = op_errno; if (op_ret >= 0) { args->weak_checksum = weak_checksum; - args->strong_checksum = memdup(strong_checksum, SHA256_DIGEST_LENGTH); + args->strong_checksum = gf_memdup(strong_checksum, + SHA256_DIGEST_LENGTH); } if (xdata) args->xdata = dict_ref(xdata); @@ -1541,6 +1541,48 @@ args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata) return 0; } +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + if (fd_in) + args->fd = fd_ref(fd_in); + if (fd_out) + args->fd_dst = fd_ref(fd_out); + args->size = len; + args->off_in = off_in; + args->off_out = off_out; + args->flags = flags; + + if (xdata) + args->xdata = dict_ref(xdata); + + return 0; +} + +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + args->op_ret = op_ret; + args->op_errno = op_errno; + if (op_ret >= 0) { + if (postbuf_dst) + args->poststat = *postbuf_dst; + if (prebuf_dst) + args->prestat = *prebuf_dst; + if (stbuf) + args->stat = *stbuf; + } + if (xdata) + args->xdata = dict_ref(xdata); + + return 0; +} + void args_cbk_wipe(default_args_cbk_t *args_cbk) { diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c index 2ce5ec7c685..3cf707f42aa 100644 --- a/libglusterfs/src/defaults-tmpl.c +++ b/libglusterfs/src/defaults-tmpl.c @@ -25,8 +25,8 @@ #include "config.h" #endif -#include "xlator.h" -#include "defaults.h" +#include "glusterfs/xlator.h" +#include "glusterfs/defaults.h" #pragma generate @@ -84,6 +84,7 @@ struct xlator_fops _default_fops = { .put = default_put, .icreate = default_icreate, .namelink = default_namelink, + .copy_file_range = default_copy_file_range, }; struct xlator_fops *default_fops = &_default_fops; @@ -124,13 +125,24 @@ int default_notify(xlator_t *this, int32_t event, void *data, ...) { GF_UNUSED int ret = 0; + xlator_t *victim = data; + + glusterfs_graph_t *graph = NULL; + + GF_VALIDATE_OR_GOTO("notify", this, out); + graph = this->graph; + GF_VALIDATE_OR_GOTO(this->name, graph, out); + switch (event) { case GF_EVENT_PARENT_UP: case GF_EVENT_PARENT_DOWN: { xlator_list_t *list = this->children; while (list) { - xlator_notify(list->xlator, event, this); + if (victim && victim->cleanup_starting) + xlator_notify(list->xlator, event, victim); + else + xlator_notify(list->xlator, event, this); list = list->next; } } break; @@ -153,6 +165,20 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) xlator_notify(parent->xlator, event, this, NULL); parent = parent->next; } + + if (event == GF_EVENT_CHILD_DOWN && + !(this->ctx && this->ctx->master) && (graph->top == this)) { + /* Make sure this is not a daemon with master xlator */ + pthread_mutex_lock(&graph->mutex); + { + if (graph->parent_down == + graph_total_client_xlator(graph)) { + graph->used = 0; + pthread_cond_broadcast(&graph->child_down_cond); + } + } + pthread_mutex_unlock(&graph->mutex); + } } break; case GF_EVENT_UPCALL: { xlator_list_t *parent = this->parents; @@ -199,7 +225,7 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) * nothing to do with readability. */ } - +out: return 0; } diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c index d75ce11c016..1d9be9217a6 100644 --- a/libglusterfs/src/dict.c +++ b/libglusterfs/src/dict.c @@ -16,19 +16,14 @@ #include <limits.h> #include <fnmatch.h> -#include "glusterfs.h" -#include "common-utils.h" -#include "dict.h" -#include "hashfn.h" -#include "logging.h" -#include "compat.h" -#include "compat-errno.h" -#include "byte-order.h" -#include "globals.h" -#include "statedump.h" -#include "libglusterfs-messages.h" - -#include "glusterfs-fops.h" +#include "glusterfs/dict.h" +#define XXH_INLINE_ALL +#include "xxhash.h" +#include "glusterfs/compat.h" +#include "glusterfs/compat-errno.h" +#include "glusterfs/byte-order.h" +#include "glusterfs/statedump.h" +#include "glusterfs/libglusterfs-messages.h" struct dict_cmp { dict_t *dict; @@ -55,15 +50,13 @@ struct dict_cmp { static data_t * get_new_data() { - data_t *data = NULL; + data_t *data = mem_get(THIS->ctx->dict_data_pool); - data = mem_get(THIS->ctx->dict_data_pool); if (!data) return NULL; GF_ATOMIC_INIT(data->refcount, 0); data->is_static = _gf_false; - LOCK_INIT(&data->lock); return data; } @@ -104,23 +97,17 @@ get_new_dict_full(int size_hint) } } + dict->free_pair.key = NULL; + dict->totkvlen = 0; LOCK_INIT(&dict->lock); return dict; } dict_t * -get_new_dict(void) -{ - return get_new_dict_full(1); -} - -dict_t * dict_new(void) { - dict_t *dict = NULL; - - dict = get_new_dict_full(1); + dict_t *dict = get_new_dict_full(1); if (dict) dict_ref(dict); @@ -132,6 +119,7 @@ int32_t is_data_equal(data_t *one, data_t *two) { struct iatt *iatt1, *iatt2; + struct mdata_iatt *mdata_iatt1, *mdata_iatt2; if (!one || !two || !one->data || !two->data) { gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, @@ -196,6 +184,24 @@ is_data_equal(data_t *one, data_t *two) */ return 1; } + if (one->data_type == GF_DATA_TYPE_MDATA) { + if ((one->len < sizeof(struct mdata_iatt)) || + (two->len < sizeof(struct mdata_iatt))) { + return 0; + } + mdata_iatt1 = (struct mdata_iatt *)one->data; + mdata_iatt2 = (struct mdata_iatt *)two->data; + + if (mdata_iatt1->ia_atime != mdata_iatt2->ia_atime || + mdata_iatt1->ia_mtime != mdata_iatt2->ia_mtime || + mdata_iatt1->ia_ctime != mdata_iatt2->ia_ctime || + mdata_iatt1->ia_atime_nsec != mdata_iatt2->ia_atime_nsec || + mdata_iatt1->ia_mtime_nsec != mdata_iatt2->ia_mtime_nsec || + mdata_iatt1->ia_ctime_nsec != mdata_iatt2->ia_ctime_nsec) { + return 0; + } + return 1; + } if (one->len != two->len) return 0; @@ -210,11 +216,8 @@ static int key_value_cmp(dict_t *one, char *key1, data_t *value1, void *data) { struct dict_cmp *cmp = data; - dict_t *two = NULL; - data_t *value2 = NULL; - - two = cmp->dict; - value2 = dict_get(two, key1); + dict_t *two = cmp->dict; + data_t *value2 = dict_get(two, key1); if (value2) { if (cmp->value_ignore && cmp->value_ignore(key1)) @@ -291,8 +294,6 @@ void data_destroy(data_t *data) { if (data) { - LOCK_DESTROY(&data->lock); - if (!data->is_static) GF_FREE(data->data); @@ -317,13 +318,12 @@ data_copy(data_t *old) newdata->len = old->len; if (old->data) { - newdata->data = memdup(old->data, old->len); + newdata->data = gf_memdup(old->data, old->len); if (!newdata->data) goto err_out; } newdata->data_type = old->data_type; - LOCK_INIT(&newdata->lock); return newdata; err_out: @@ -337,7 +337,7 @@ err_out: * checked by callers. */ static data_pair_t * -dict_lookup_common(dict_t *this, char *key, uint32_t hash) +dict_lookup_common(const dict_t *this, const char *key, const uint32_t hash) { int hashval = 0; data_pair_t *pair; @@ -359,8 +359,6 @@ dict_lookup_common(dict_t *this, char *key, uint32_t hash) int32_t dict_lookup(dict_t *this, char *key, data_t **data) { - uint32_t hash; - if (!this || !key || !data) { gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, "!this || !key || " @@ -370,7 +368,7 @@ dict_lookup(dict_t *this, char *key, data_t **data) data_pair_t *tmp = NULL; - hash = SuperFastHash(key, strlen(key)); + uint32_t hash = (uint32_t)XXH64(key, strlen(key), 0); LOCK(&this->lock); { @@ -386,8 +384,8 @@ dict_lookup(dict_t *this, char *key, data_t **data) } static int32_t -dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash, - gf_boolean_t replace) +dict_set_lk(dict_t *this, char *key, const int key_len, data_t *value, + const uint32_t hash, gf_boolean_t replace) { int hashval = 0; data_pair_t *pair; @@ -401,19 +399,19 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash, return -1; } key_free = 1; - key_hash = SuperFastHash(key, keylen); + key_hash = (uint32_t)XXH64(key, keylen, 0); } else { - keylen = strlen(key); + keylen = key_len; key_hash = hash; } /* Search for a existing key if 'replace' is asked for */ if (replace) { pair = dict_lookup_common(this, key, key_hash); - if (pair) { data_t *unref_data = pair->value; pair->value = data_ref(value); + this->totkvlen += (value->len - unref_data->len); data_unref(unref_data); if (key_free) GF_FREE(key); @@ -422,16 +420,15 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash, } } - if (this->free_pair_in_use) { + if (this->free_pair.key) { /* the free_pair is used */ pair = mem_get(THIS->ctx->dict_pair_pool); if (!pair) { if (key_free) GF_FREE(key); return -1; } - } else { + } else { /* assign the pair to the free pair */ pair = &this->free_pair; - this->free_pair_in_use = _gf_true; } if (key_free) { @@ -441,9 +438,7 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash, } else { pair->key = (char *)GF_MALLOC(keylen + 1, gf_common_mt_char); if (!pair->key) { - if (pair == &this->free_pair) { - this->free_pair_in_use = _gf_false; - } else { + if (pair != &this->free_pair) { mem_put(pair); } return -1; @@ -452,6 +447,7 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash, } pair->key_hash = key_hash; pair->value = data_ref(value); + this->totkvlen += (keylen + 1 + value->len); /* If the divisor is 1, the modulo is always 0, * in such case avoid hash calculation. @@ -501,12 +497,12 @@ dict_setn(dict_t *this, char *key, const int keylen, data_t *value) } if (key) { - key_hash = SuperFastHash(key, keylen); + key_hash = (uint32_t)XXH64(key, keylen, 0); } LOCK(&this->lock); - ret = dict_set_lk(this, key, value, key_hash, 1); + ret = dict_set_lk(this, key, keylen, value, key_hash, 1); UNLOCK(&this->lock); @@ -535,12 +531,12 @@ dict_addn(dict_t *this, char *key, const int keylen, data_t *value) } if (key) { - key_hash = SuperFastHash(key, keylen); + key_hash = (uint32_t)XXH64(key, keylen, 0); } LOCK(&this->lock); - ret = dict_set_lk(this, key, value, key_hash, 0); + ret = dict_set_lk(this, key, keylen, value, key_hash, 0); UNLOCK(&this->lock); @@ -551,7 +547,7 @@ data_t * dict_get(dict_t *this, char *key) { if (!this || !key) { - gf_msg_callingfn("dict", GF_LOG_INFO, EINVAL, LG_MSG_INVALID_ARG, + gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, "!this || key=%s", (key) ? key : "()"); return NULL; } @@ -566,12 +562,12 @@ dict_getn(dict_t *this, char *key, const int keylen) uint32_t hash; if (!this || !key) { - gf_msg_callingfn("dict", GF_LOG_INFO, EINVAL, LG_MSG_INVALID_ARG, + gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, "!this || key=%s", (key) ? key : "()"); return NULL; } - hash = SuperFastHash(key, keylen); + hash = (uint32_t)XXH64(key, keylen, 0); LOCK(&this->lock); { @@ -629,7 +625,7 @@ dict_deln(dict_t *this, char *key, const int keylen) return; } - hash = SuperFastHash(key, keylen); + hash = (uint32_t)XXH64(key, keylen, 0); LOCK(&this->lock); @@ -649,6 +645,7 @@ dict_deln(dict_t *this, char *key, const int keylen) else this->members[hashval] = pair->hash_next; + this->totkvlen -= pair->value->len; data_unref(pair->value); if (pair->prev) @@ -659,9 +656,10 @@ dict_deln(dict_t *this, char *key, const int keylen) if (pair->next) pair->next->prev = pair->prev; + this->totkvlen -= (strlen(pair->key) + 1); GF_FREE(pair->key); if (pair == &this->free_pair) { - this->free_pair_in_use = _gf_false; + this->free_pair.key = NULL; } else { mem_put(pair); } @@ -701,16 +699,18 @@ dict_destroy(dict_t *this) GF_FREE(prev->key); if (prev != &this->free_pair) { mem_put(prev); + } else { + this->free_pair.key = NULL; } total_pairs++; prev = pair; } + this->totkvlen = 0; if (this->members != &this->members_internal) { mem_put(this->members); } - GF_FREE(this->extra_free); free(this->extra_stdfree); /* update 'ctx->stats.dict.details' using max_count */ @@ -742,7 +742,7 @@ dict_unref(dict_t *this) uint64_t ref = 0; if (!this) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, + gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, "dict is NULL"); return; } @@ -757,7 +757,7 @@ dict_t * dict_ref(dict_t *this) { if (!this) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, + gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, "dict is NULL"); return NULL; } @@ -769,7 +769,7 @@ dict_ref(dict_t *this) void data_unref(data_t *this) { - int32_t ref; + uint64_t ref; if (!this) { gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, @@ -809,6 +809,7 @@ int_to_data(int64_t value) data->len = gf_asprintf(&data->data, "%" PRId64, value); if (-1 == data->len) { gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } data->len++; /* account for terminating NULL */ @@ -828,6 +829,7 @@ data_from_int64(int64_t value) data->len = gf_asprintf(&data->data, "%" PRId64, value); if (-1 == data->len) { gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } data->len++; /* account for terminating NULL */ @@ -847,6 +849,7 @@ data_from_int32(int32_t value) data->len = gf_asprintf(&data->data, "%" PRId32, value); if (-1 == data->len) { gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } @@ -867,6 +870,7 @@ data_from_int16(int16_t value) data->len = gf_asprintf(&data->data, "%" PRId16, value); if (-1 == data->len) { gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } @@ -887,6 +891,7 @@ data_from_int8(int8_t value) data->len = gf_asprintf(&data->data, "%d", value); if (-1 == data->len) { gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } @@ -907,6 +912,7 @@ data_from_uint64(uint64_t value) data->len = gf_asprintf(&data->data, "%" PRIu64, value); if (-1 == data->len) { gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } @@ -919,9 +925,7 @@ data_from_uint64(uint64_t value) data_t * data_from_double(double value) { - data_t *data = NULL; - - data = get_new_data(); + data_t *data = get_new_data(); if (!data) { return NULL; @@ -929,6 +933,8 @@ data_from_double(double value) data->len = gf_asprintf(&data->data, "%f", value); if (data->len == -1) { + gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } data->len++; /* account for terminating NULL */ @@ -948,6 +954,7 @@ data_from_uint32(uint32_t value) data->len = gf_asprintf(&data->data, "%" PRIu32, value); if (-1 == data->len) { gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } @@ -967,6 +974,8 @@ data_from_uint16(uint16_t value) } data->len = gf_asprintf(&data->data, "%" PRIu16, value); if (-1 == data->len) { + gf_msg_debug("dict", 0, "asprintf failed"); + data_destroy(data); return NULL; } @@ -1094,6 +1103,7 @@ static char *data_type_name[GF_DATA_TYPE_MAX] = { [GF_DATA_TYPE_PTR] = "pointer", [GF_DATA_TYPE_GFUUID] = "gf-uuid", [GF_DATA_TYPE_IATT] = "iatt", + [GF_DATA_TYPE_MDATA] = "mdata", }; int64_t @@ -1101,119 +1111,146 @@ data_to_int64(data_t *data) { VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); - return (int64_t)strtoull(data->data, NULL, 0); + char *endptr = NULL; + int64_t value = 0; + + errno = 0; + value = strtoll(data->data, &endptr, 0); + + if (endptr && *endptr != '\0') + /* Unrecognized characters at the end of string. */ + errno = EINVAL; + if (errno) { + gf_msg_callingfn("dict", GF_LOG_WARNING, errno, + LG_MSG_DATA_CONVERSION_ERROR, + "Error in data conversion: '%s' can't " + "be represented as int64_t", + data->data); + return -1; + } + return value; } +/* Like above but implies signed range check. */ + +#define DATA_TO_RANGED_SIGNED(endptr, value, data, type, min, max) \ + do { \ + errno = 0; \ + value = strtoll(data->data, &endptr, 0); \ + if (endptr && *endptr != '\0') \ + errno = EINVAL; \ + if (errno || value > max || value < min) { \ + gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \ + LG_MSG_DATA_CONVERSION_ERROR, \ + "Error in data conversion: '%s' can't " \ + "be represented as " #type, \ + data->data); \ + return -1; \ + } \ + return (type)value; \ + } while (0) + int32_t data_to_int32(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + char *endptr = NULL; + int64_t value = 0; - return strtoul(data->data, NULL, 0); + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + DATA_TO_RANGED_SIGNED(endptr, value, data, int32_t, INT_MIN, INT_MAX); } int16_t data_to_int16(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); - - int16_t value = 0; + char *endptr = NULL; + int64_t value = 0; - errno = 0; - value = strtol(data->data, NULL, 0); - - if ((value > SHRT_MAX) || (value < SHRT_MIN)) { - errno = ERANGE; - gf_msg_callingfn("dict", GF_LOG_WARNING, errno, - LG_MSG_DATA_CONVERSION_ERROR, - "Error in data" - " conversion: detected overflow"); - return -1; - } - - return (int16_t)value; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + DATA_TO_RANGED_SIGNED(endptr, value, data, int16_t, SHRT_MIN, SHRT_MAX); } int8_t data_to_int8(data_t *data) { + char *endptr = NULL; + int64_t value = 0; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1); + DATA_TO_RANGED_SIGNED(endptr, value, data, int8_t, CHAR_MIN, CHAR_MAX); +} - int8_t value = 0; +uint64_t +data_to_uint64(data_t *data) +{ + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + + char *endptr = NULL; + uint64_t value = 0; errno = 0; - value = strtol(data->data, NULL, 0); + value = strtoull(data->data, &endptr, 0); - if ((value > SCHAR_MAX) || (value < SCHAR_MIN)) { - errno = ERANGE; + if (endptr && *endptr != '\0') + errno = EINVAL; + if (errno) { gf_msg_callingfn("dict", GF_LOG_WARNING, errno, LG_MSG_DATA_CONVERSION_ERROR, - "Error in data" - " conversion: detected overflow"); + "Error in data conversion: '%s' can't " + "be represented as uint64_t", + data->data); return -1; } - - return (int8_t)value; + return value; } -uint64_t -data_to_uint64(data_t *data) -{ - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); +/* Like above but implies unsigned range check. */ - return strtoll(data->data, NULL, 0); -} +#define DATA_TO_RANGED_UNSIGNED(endptr, value, data, type, max) \ + do { \ + errno = 0; \ + value = strtoull(data->data, &endptr, 0); \ + if (endptr && *endptr != '\0') \ + errno = EINVAL; \ + if (errno || value > max) { \ + gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \ + LG_MSG_DATA_CONVERSION_ERROR, \ + "Error in data conversion: '%s' can't " \ + "be represented as " #type, \ + data->data); \ + return -1; \ + } \ + return (type)value; \ + } while (0) uint32_t data_to_uint32(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + char *endptr = NULL; + uint64_t value = 0; - return strtol(data->data, NULL, 0); + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint32_t, UINT_MAX); } uint16_t data_to_uint16(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); - - uint16_t value = 0; + char *endptr = NULL; + uint64_t value = 0; - errno = 0; - value = strtol(data->data, NULL, 0); - - if ((USHRT_MAX - value) < 0) { - errno = ERANGE; - gf_msg_callingfn("dict", GF_LOG_WARNING, errno, - LG_MSG_DATA_CONVERSION_ERROR, - "Error in data conversion: " - "overflow detected"); - return -1; - } - - return (uint16_t)value; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint16_t, USHRT_MAX); } uint8_t data_to_uint8(data_t *data) { - VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); - - uint32_t value = 0; - - errno = 0; - value = strtol(data->data, NULL, 0); - - if ((UCHAR_MAX - (uint8_t)value) < 0) { - errno = ERANGE; - gf_msg_callingfn("dict", GF_LOG_WARNING, errno, - LG_MSG_DATA_CONVERSION_ERROR, - "data " - "conversion overflow detected"); - return -1; - } + char *endptr = NULL; + uint64_t value = 0; - return (uint8_t)value; + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1); + DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint8_t, UCHAR_MAX); } char * @@ -1247,8 +1284,8 @@ data_to_iatt(data_t *data, char *key) * pass more data but are backward compatible (if the initial contents * of the struct are maintained, of course). */ if (data->len < sizeof(struct iatt)) { - gf_msg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF, - "data value for '%s' is smaller than expected", key); + gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF, + "key=%s", key, NULL); return NULL; } @@ -1265,8 +1302,8 @@ int dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *_tmp) { if (!d || !k) { - gf_msg("glusterfs", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY, - "%s is NULL", d ? "key" : "dictionary"); + gf_smsg("glusterfs", GF_LOG_WARNING, EINVAL, LG_MSG_KEY_OR_VALUE_NULL, + "d=%s", d ? "key" : "dictionary", NULL); return -1; } @@ -1285,9 +1322,7 @@ dict_foreach(dict_t *dict, int (*fn)(dict_t *this, char *key, data_t *value, void *data), void *data) { - int ret = 0; - - ret = dict_foreach_match(dict, dict_match_everything, NULL, fn, data); + int ret = dict_foreach_match(dict, dict_match_everything, NULL, fn, data); if (ret > 0) ret = 0; @@ -1318,10 +1353,9 @@ dict_foreach_match(dict_t *dict, int ret = -1; int count = 0; - data_pair_t *pairs = NULL; + data_pair_t *pairs = dict->members_list; data_pair_t *next = NULL; - pairs = dict->members_list; while (pairs) { next = pairs->next; if (match(dict, pairs->key, pairs->value, match_data)) { @@ -1372,10 +1406,9 @@ int dict_keys_join(void *value, int size, dict_t *dict, int (*filter_fn)(char *k)) { int len = 0; - data_pair_t *pairs = NULL; + data_pair_t *pairs = dict->members_list; data_pair_t *next = NULL; - pairs = dict->members_list; while (pairs) { next = pairs->next; @@ -1462,33 +1495,14 @@ fail: * -val error, val = errno */ -int -dict_get_with_ref(dict_t *this, char *key, data_t **data) -{ - if (!this || !key || !data) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "dict OR key (%s) is NULL", key); - return -EINVAL; - } - - return dict_get_with_refn(this, key, strlen(key), data); -} - -int +static int dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data) { data_pair_t *pair = NULL; int ret = -ENOENT; uint32_t hash; - if (!this || !key || !data) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "dict OR key (%s) is NULL", key); - ret = -EINVAL; - goto err; - } - - hash = SuperFastHash(key, keylen); + hash = (uint32_t)XXH64(key, keylen, 0); LOCK(&this->lock); { @@ -1500,10 +1514,22 @@ dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data) } } UNLOCK(&this->lock); -err: + return ret; } +int +dict_get_with_ref(dict_t *this, char *key, data_t **data) +{ + if (!this || !key || !data) { + gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, + "dict OR key (%s) is NULL", key); + return -EINVAL; + } + + return dict_get_with_refn(this, key, strlen(key), data); +} + static int data_to_ptr_common(data_t *data, void **val) { @@ -1677,7 +1703,7 @@ dict_get_int8(dict_t *this, char *key, int8_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1723,7 +1749,7 @@ dict_get_int16(dict_t *this, char *key, int16_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1795,7 +1821,7 @@ dict_get_int32(dict_t *this, char *key, int32_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1838,10 +1864,9 @@ err: int dict_set_int32(dict_t *this, char *key, int32_t val) { - data_t *data = NULL; + data_t *data = data_from_int32(val); int ret = 0; - data = data_from_int32(val); if (!data) { ret = -EINVAL; goto err; @@ -1861,7 +1886,7 @@ dict_get_int64(dict_t *this, char *key, int64_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1884,10 +1909,9 @@ err: int dict_set_int64(dict_t *this, char *key, int64_t val) { - data_t *data = NULL; + data_t *data = data_from_int64(val); int ret = 0; - data = data_from_int64(val); if (!data) { ret = -EINVAL; goto err; @@ -1907,7 +1931,7 @@ dict_get_uint16(dict_t *this, char *key, uint16_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1930,10 +1954,9 @@ err: int dict_set_uint16(dict_t *this, char *key, uint16_t val) { - data_t *data = NULL; + data_t *data = data_from_uint16(val); int ret = 0; - data = data_from_uint16(val); if (!data) { ret = -EINVAL; goto err; @@ -1953,7 +1976,7 @@ dict_get_uint32(dict_t *this, char *key, uint32_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1976,10 +1999,9 @@ err: int dict_set_uint32(dict_t *this, char *key, uint32_t val) { - data_t *data = NULL; + data_t *data = data_from_uint32(val); int ret = 0; - data = data_from_uint32(val); if (!data) { ret = -EINVAL; goto err; @@ -1999,7 +2021,7 @@ dict_get_uint64(dict_t *this, char *key, uint64_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -2022,10 +2044,9 @@ err: int dict_set_uint64(dict_t *this, char *key, uint64_t val) { - data_t *data = NULL; + data_t *data = data_from_uint64(val); int ret = 0; - data = data_from_uint64(val); if (!data) { ret = -EINVAL; goto err; @@ -2096,7 +2117,7 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op) */ GF_ASSERT(flag >= 0 && flag < DICT_MAX_FLAGS); - hash = SuperFastHash(key, strlen(key)); + hash = (uint32_t)XXH64(key, strlen(key), 0); LOCK(&this->lock); { pair = dict_lookup_common(this, key, hash); @@ -2110,8 +2131,8 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op) } else { ptr = GF_CALLOC(1, DICT_MAX_FLAGS / 8, gf_common_mt_char); if (!ptr) { - gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "unable to allocate flag bit array"); + gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, + "flag bit array", NULL); ret = -ENOMEM; goto err; } @@ -2119,8 +2140,8 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op) data = data_from_dynptr(ptr, DICT_MAX_FLAGS / 8); if (!data) { - gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "unable to allocate data"); + gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, "data", + NULL); GF_FREE(ptr); ret = -ENOMEM; goto err; @@ -2131,30 +2152,29 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op) else BIT_CLEAR((unsigned char *)(data->data), flag); - if (this->free_pair_in_use) { + if (this->free_pair.key) { /* the free pair is in use */ pair = mem_get0(THIS->ctx->dict_pair_pool); if (!pair) { - gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "unable to allocate dict pair"); + gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, + "dict pair", NULL); ret = -ENOMEM; goto err; } - } else { + } else { /* use the free pair */ pair = &this->free_pair; - this->free_pair_in_use = _gf_true; } pair->key = (char *)GF_MALLOC(strlen(key) + 1, gf_common_mt_char); if (!pair->key) { - gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "unable to allocate dict pair"); + gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, + "dict pair", NULL); ret = -ENOMEM; goto err; } strcpy(pair->key, key); pair->key_hash = hash; pair->value = data_ref(data); - + this->totkvlen += (strlen(key) + 1 + data->len); hashval = hash % this->hash_size; pair->hash_next = this->members[hashval]; this->members[hashval] = pair; @@ -2179,12 +2199,11 @@ err: UNLOCK(&this->lock); if (pair) { - if (pair->key) - free(pair->key); - - if (pair == &this->free_pair) { - this->free_pair_in_use = _gf_false; - } else { + if (pair->key) { + GF_FREE(pair->key); + pair->key = NULL; + } + if (pair != &this->free_pair) { mem_put(pair); } } @@ -2192,8 +2211,8 @@ err: if (data) data_destroy(data); - gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_DICT_SET_FAILED, - "unable to set key (%s) in dict ", key); + gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_DICT_SET_FAILED, "key=%s", key, + NULL); return ret; } @@ -2225,7 +2244,7 @@ dict_get_double(dict_t *this, char *key, double *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -2248,10 +2267,9 @@ err: int dict_set_double(dict_t *this, char *key, double val) { - data_t *data = NULL; + data_t *data = data_from_double(val); int ret = 0; - data = data_from_double(val); if (!data) { ret = -EINVAL; goto err; @@ -2268,10 +2286,9 @@ err: int dict_set_static_ptr(dict_t *this, char *key, void *ptr) { - data_t *data = NULL; + data_t *data = data_from_ptr_common(ptr, _gf_true); int ret = 0; - data = data_from_ptr_common(ptr, _gf_true); if (!data) { ret = -EINVAL; goto err; @@ -2288,10 +2305,9 @@ err: int dict_set_dynptr(dict_t *this, char *key, void *ptr, size_t len) { - data_t *data = NULL; + data_t *data = data_from_dynptr(ptr, len); int ret = 0; - data = data_from_dynptr(ptr, len); if (!data) { ret = -EINVAL; goto err; @@ -2311,7 +2327,7 @@ dict_get_ptr(dict_t *this, char *key, void **ptr) data_t *data = NULL; int ret = 0; - if (!this || !key || !ptr) { + if (!ptr) { ret = -EINVAL; goto err; } @@ -2341,7 +2357,7 @@ dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len) data_t *data = NULL; int ret = 0; - if (!this || !key || !ptr) { + if (!ptr) { ret = -EINVAL; goto err; } @@ -2399,7 +2415,7 @@ dict_get_str(dict_t *this, char *key, char **str) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !str) { + if (!str) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2421,10 +2437,9 @@ err: int dict_set_str(dict_t *this, char *key, char *str) { - data_t *data = NULL; + data_t *data = str_to_data(str); int ret = 0; - data = str_to_data(str); if (!data) { ret = -EINVAL; goto err; @@ -2464,10 +2479,9 @@ int dict_set_nstrn(dict_t *this, char *key, const int keylen, char *str, const int vallen) { - data_t *data = NULL; + data_t *data = strn_to_data(str, vallen); int ret = 0; - data = strn_to_data(str, vallen); if (!data) { ret = -EINVAL; goto err; @@ -2484,12 +2498,11 @@ err: int dict_set_dynstr_with_alloc(dict_t *this, char *key, const char *str) { - char *alloc_str = NULL; + char *alloc_str = gf_strdup(str); int ret = -1; - alloc_str = gf_strdup(str); if (!alloc_str) - return -1; + return ret; ret = dict_set_dynstr(this, key, alloc_str); if (ret == -EINVAL) @@ -2508,10 +2521,9 @@ dict_set_dynstr(dict_t *this, char *key, char *str) int dict_set_dynstrn(dict_t *this, char *key, const int keylen, char *str) { - data_t *data = NULL; + data_t *data = data_from_dynstr(str); int ret = 0; - data = data_from_dynstr(str); if (!data) { ret = -EINVAL; goto err; @@ -2530,10 +2542,9 @@ err: int dict_set_option(dict_t *this, char *key, char *str) { - data_t *data = NULL; + data_t *data = data_from_dynstr(str); int ret = 0; - data = data_from_dynstr(str); if (!data) { ret = -EINVAL; goto err; @@ -2552,9 +2563,8 @@ dict_add_dynstr_with_alloc(dict_t *this, char *key, char *str) { data_t *data = NULL; int ret = 0; - char *alloc_str = NULL; + char *alloc_str = gf_strdup(str); - alloc_str = gf_strdup(str); if (!alloc_str) goto out; @@ -2579,7 +2589,7 @@ dict_get_bin(dict_t *this, char *key, void **bin) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !bin) { + if (!bin) { goto err; } @@ -2682,7 +2692,7 @@ dict_get_gfuuid(dict_t *this, char *key, uuid_t *gfid) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !gfid) { + if (!gfid) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2702,6 +2712,45 @@ err: } int +dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata, + bool is_static) +{ + return dict_set_bin_common(this, key, mdata, sizeof(struct mdata_iatt), + is_static, GF_DATA_TYPE_MDATA); +} + +int +dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata) +{ + data_t *data = NULL; + int ret = -EINVAL; + + if (!mdata) { + goto err; + } + ret = dict_get_with_ref(this, key, &data); + if (ret < 0) { + goto err; + } + + VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_MDATA, key, -EINVAL); + if (data->len < sizeof(struct mdata_iatt)) { + gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF, + "key=%s", key, NULL); + ret = -ENOBUFS; + goto err; + } + + memcpy(mdata, data->data, min(data->len, sizeof(struct mdata_iatt))); + +err: + if (data) + data_unref(data); + + return ret; +} + +int dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static) { return dict_set_bin_common(this, key, iatt, sizeof(struct iatt), is_static, @@ -2714,7 +2763,7 @@ dict_get_iatt(dict_t *this, char *key, struct iatt *iatt) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !iatt) { + if (!iatt) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2774,7 +2823,7 @@ dict_get_str_boolean(dict_t *this, char *key, int default_val) VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL); - ret = gf_string2boolean(data->data, &boo); + ret = gf_strn2boolean(data->data, data->len - 1, &boo); if (ret == -1) goto err; @@ -2794,6 +2843,7 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) int ret = -EINVAL; uint32_t hash; uint32_t replacekey_hash; + int replacekey_len; /* replacing a key by itself is a NO-OP */ if (strcmp(key, replace_key) == 0) @@ -2805,8 +2855,9 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) return ret; } - hash = SuperFastHash(key, strlen(key)); - replacekey_hash = SuperFastHash(replace_key, strlen(replace_key)); + hash = (uint32_t)XXH64(key, strlen(key), 0); + replacekey_len = strlen(replace_key); + replacekey_hash = (uint32_t)XXH64(replace_key, replacekey_len, 0); LOCK(&this->lock); { @@ -2815,8 +2866,8 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) if (!pair) ret = -ENODATA; else - ret = dict_set_lk(this, replace_key, pair->value, replacekey_hash, - 1); + ret = dict_set_lk(this, replace_key, replacekey_len, pair->value, + replacekey_hash, 1); } UNLOCK(&this->lock); @@ -2835,10 +2886,6 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) * 4 4 4 <key len> <value len> */ -#define DICT_HDR_LEN 4 -#define DICT_DATA_HDR_KEY_LEN 4 -#define DICT_DATA_HDR_VAL_LEN 4 - /** * dict_serialized_length_lk - return the length of serialized dict. This * procedure has to be called with this->lock held. @@ -2848,58 +2895,20 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) * : failure: -errno */ -static int +int dict_serialized_length_lk(dict_t *this) { int ret = -EINVAL; int count = this->count; - int len = DICT_HDR_LEN; - data_pair_t *pair = this->members_list; + const int keyhdrlen = DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN; if (count < 0) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_COUNT_LESS_THAN_ZERO, - "count (%d) < 0!", count); + gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_COUNT_LESS_THAN_ZERO, + "count=%d", count, NULL); goto out; } - while (count) { - if (!pair) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, - LG_MSG_COUNT_LESS_THAN_DATA_PAIRS, - "less than count data pairs found!"); - goto out; - } - - len += DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN; - - if (!pair->key) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_NULL_PTR, - "pair->key is null!"); - goto out; - } - - len += strlen(pair->key) + 1 /* for '\0' */; - - if (!pair->value) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_NULL_PTR, - "pair->value is null!"); - goto out; - } - - if (pair->value->len < 0) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, - LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, "value->len (%d) < 0", - pair->value->len); - goto out; - } - - len += pair->value->len; - - pair = pair->next; - count--; - } - - ret = len; + ret = DICT_HDR_LEN + this->totkvlen + (count * keyhdrlen); out: return ret; } @@ -2926,14 +2935,13 @@ dict_serialize_lk(dict_t *this, char *buf) int32_t netword = 0; if (!buf) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, - "buf is null!"); + gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL); goto out; } if (count < 0) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO, - "count (%d) < 0!", count); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO, + "count=%d", count, NULL); goto out; } @@ -2943,14 +2951,13 @@ dict_serialize_lk(dict_t *this, char *buf) while (count) { if (!pair) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT, - "less than count data pairs found!"); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT, + NULL); goto out; } if (!pair->key) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, - "pair->key is null!"); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL); goto out; } @@ -2960,8 +2967,7 @@ dict_serialize_lk(dict_t *this, char *buf) buf += DICT_DATA_HDR_KEY_LEN; if (!pair->value) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, - "pair->value is null!"); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL); goto out; } @@ -3109,8 +3115,8 @@ dict_unserialize(char *orig_buf, int32_t size, dict_t **fill) buf += DICT_HDR_LEN; if (count < 0) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO, - "count (%d) <= 0", count); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO, + "count=%d", count, NULL); goto out; } @@ -3175,12 +3181,12 @@ dict_unserialize(char *orig_buf, int32_t size, dict_t **fill) goto out; } value->len = vallen; - value->data = memdup(buf, vallen); + value->data = gf_memdup(buf, vallen); value->data_type = GF_DATA_TYPE_STR_OLD; value->is_static = _gf_false; buf += vallen; - ret = dict_add(*fill, key, value); + ret = dict_addn(*fill, key, keylen, value); if (ret < 0) goto out; } @@ -3266,32 +3272,30 @@ dict_serialize_value_with_delim_lk(dict_t *this, char *buf, int32_t *serz_len, data_pair_t *pair = this->members_list; if (!buf) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "buf is null"); + gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL); goto out; } if (count < 0) { - gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, - "count (%d) < 0", count); + gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "count=%d", + count, NULL); goto out; } while (count) { if (!pair) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT, - "less than count data pairs found"); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT, + NULL); goto out; } if (!pair->key || !pair->value) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_KEY_OR_VALUE_NULL, - "key or value is null"); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_KEY_OR_VALUE_NULL, NULL); goto out; } if (!pair->value->data) { - gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_VALUE_IN_DICT, - "null value found in dict"); + gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_VALUE_IN_DICT, NULL); goto out; } @@ -3365,7 +3369,7 @@ dict_dump_to_log(dict_t *dict) { int ret = -1; char *dump = NULL; - int dump_size = 64 * 1024; + const int dump_size = 64 * 1024; char *format = "(%s:%s)"; if (!dict) { @@ -3383,12 +3387,11 @@ dict_dump_to_log(dict_t *dict) ret = dict_dump_to_str(dict, dump, dump_size, format); if (ret) { - gf_msg("dict", GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, - "Failed to log dictionary"); + gf_smsg("dict", GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, NULL); goto out; } - gf_msg("dict", GF_LOG_INFO, 0, LG_MSG_DICT_ERROR, "dict=%p (%s)", dict, - dump); + gf_smsg("dict", GF_LOG_INFO, 0, LG_MSG_DICT_ERROR, "dict=%p", dict, + "dump=%s", dump, NULL); out: GF_FREE(dump); @@ -3400,7 +3403,7 @@ dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain) { int ret = -1; char *dump = NULL; - int dump_size = 64 * 1024; + const int dump_size = 64 * 1024; char key[4096] = { 0, }; @@ -3421,8 +3424,8 @@ dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain) ret = dict_dump_to_str(dict, dump, dump_size, format); if (ret) { - gf_msg(domain, GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, - "Failed to log dictionary %s", dict_name); + gf_smsg(domain, GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, "name=%s", + dict_name, NULL); goto out; } gf_proc_dump_build_key(key, domain, "%s", dict_name); @@ -3438,10 +3441,9 @@ dict_t * dict_for_key_value(const char *name, const char *value, size_t size, gf_boolean_t is_static) { - dict_t *xattr = NULL; + dict_t *xattr = dict_new(); int ret = 0; - xattr = dict_new(); if (!xattr) return NULL; @@ -3473,7 +3475,7 @@ dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result) LOCK(&dict->lock); { for (i = 0; strings[i]; i++) { - hash = SuperFastHash(strings[i], strlen(strings[i])); + hash = (uint32_t)XXH64(strings[i], strlen(strings[i]), 0); if (dict_lookup_common(dict, strings[i], hash)) { *result = _gf_true; goto unlock; diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c index 9826cc9e275..fb4fb845b40 100644 --- a/libglusterfs/src/event-epoll.c +++ b/libglusterfs/src/event-epoll.c @@ -8,20 +8,14 @@ cases as published by the Free Software Foundation. */ -#include <sys/poll.h> #include <pthread.h> -#include <unistd.h> -#include <fcntl.h> #include <stdlib.h> #include <errno.h> -#include <string.h> -#include "logging.h" -#include "gf-event.h" -#include "mem-pool.h" -#include "common-utils.h" -#include "syscall.h" -#include "libglusterfs-messages.h" +#include "glusterfs/gf-event.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/syscall.h" +#include "glusterfs/libglusterfs-messages.h" #ifdef HAVE_SYS_EPOLL_H #include <sys/epoll.h> @@ -30,6 +24,7 @@ struct event_slot_epoll { int fd; int events; int gen; + int idx; gf_atomic_t ref; int do_close; int in_handler; @@ -37,6 +32,7 @@ struct event_slot_epoll { void *data; event_handler_t handler; gf_lock_t lock; + struct list_head poller_death; }; struct event_thread_data { @@ -57,6 +53,7 @@ __event_newtable(struct event_pool *event_pool, int table_idx) for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { table[i].fd = -1; LOCK_INIT(&table[i].lock); + INIT_LIST_HEAD(&table[i].poller_death); } event_pool->ereg[table_idx] = table; @@ -66,17 +63,30 @@ __event_newtable(struct event_pool *event_pool, int table_idx) } static int -__event_slot_alloc(struct event_pool *event_pool, int fd) +event_slot_ref(struct event_slot_epoll *slot) +{ + if (!slot) + return -1; + + return GF_ATOMIC_INC(slot->ref); +} + +static int +__event_slot_alloc(struct event_pool *event_pool, int fd, + char notify_poller_death, struct event_slot_epoll **slot) { int i = 0; + int j = 0; int table_idx = -1; int gen = -1; struct event_slot_epoll *table = NULL; - for (i = 0; i < EVENT_EPOLL_TABLES; i++) { +retry: + + while (i < EVENT_EPOLL_TABLES) { switch (event_pool->slots_used[i]) { case EVENT_EPOLL_SLOTS: - continue; + break; case 0: if (!event_pool->ereg[i]) { table = __event_newtable(event_pool, i); @@ -94,6 +104,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd) if (table) /* break out of the loop */ break; + i++; } if (!table) @@ -101,33 +112,49 @@ __event_slot_alloc(struct event_pool *event_pool, int fd) table_idx = i; - for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { - if (table[i].fd == -1) { + for (j = 0; j < EVENT_EPOLL_SLOTS; j++) { + if (table[j].fd == -1) { /* wipe everything except bump the generation */ - gen = table[i].gen; - memset(&table[i], 0, sizeof(table[i])); - table[i].gen = gen + 1; - - LOCK_INIT(&table[i].lock); + gen = table[j].gen; + memset(&table[j], 0, sizeof(table[j])); + table[j].gen = gen + 1; + + LOCK_INIT(&table[j].lock); + INIT_LIST_HEAD(&table[j].poller_death); + + table[j].fd = fd; + if (notify_poller_death) { + table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j; + list_add_tail(&table[j].poller_death, + &event_pool->poller_death); + } - table[i].fd = fd; event_pool->slots_used[table_idx]++; break; } } - return table_idx * EVENT_EPOLL_SLOTS + i; + if (j == EVENT_EPOLL_SLOTS) { + table = NULL; + i++; + goto retry; + } else { + (*slot) = &table[j]; + event_slot_ref(*slot); + return table_idx * EVENT_EPOLL_SLOTS + j; + } } static int -event_slot_alloc(struct event_pool *event_pool, int fd) +event_slot_alloc(struct event_pool *event_pool, int fd, + char notify_poller_death, struct event_slot_epoll **slot) { int idx = -1; pthread_mutex_lock(&event_pool->mutex); { - idx = __event_slot_alloc(event_pool, fd); + idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot); } pthread_mutex_unlock(&event_pool->mutex); @@ -141,6 +168,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) int offset = 0; struct event_slot_epoll *table = NULL; struct event_slot_epoll *slot = NULL; + int fd = -1; table_idx = idx / EVENT_EPOLL_SLOTS; offset = idx % EVENT_EPOLL_SLOTS; @@ -152,10 +180,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) slot = &table[offset]; slot->gen++; + fd = slot->fd; slot->fd = -1; slot->handled_error = 0; slot->in_handler = 0; - event_pool->slots_used[table_idx]--; + list_del_init(&slot->poller_death); + if (fd != -1) + event_pool->slots_used[table_idx]--; return; } @@ -188,12 +219,41 @@ event_slot_get(struct event_pool *event_pool, int idx) return NULL; slot = &table[offset]; - GF_ATOMIC_INC(slot->ref); + event_slot_ref(slot); return slot; } static void +__event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot, + int idx) +{ + int ref = -1; + int fd = -1; + int do_close = 0; + + ref = GF_ATOMIC_DEC(slot->ref); + if (ref) + /* slot still alive */ + goto done; + + LOCK(&slot->lock); + { + fd = slot->fd; + do_close = slot->do_close; + slot->do_close = 0; + } + UNLOCK(&slot->lock); + + __event_slot_dealloc(event_pool, idx); + + if (do_close) + sys_close(fd); +done: + return; +} + +static void event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot, int idx) { @@ -236,9 +296,8 @@ event_pool_new_epoll(int count, int eventthreadcount) epfd = epoll_create(count); if (epfd == -1) { - gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_CREATE_FAILED, - "epoll fd creation " - "failed"); + gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_CREATE_FAILED, + NULL); GF_FREE(event_pool->reg); GF_FREE(event_pool); event_pool = NULL; @@ -248,7 +307,7 @@ event_pool_new_epoll(int count, int eventthreadcount) event_pool->fd = epfd; event_pool->count = count; - + INIT_LIST_HEAD(&event_pool->poller_death); event_pool->eventthreadcount = eventthreadcount; event_pool->auto_thread_count = 0; @@ -272,8 +331,8 @@ __slot_update_events(struct event_slot_epoll *slot, int poll_in, int poll_out) /* do nothing */ break; default: - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN, - "invalid poll_in value %d", poll_in); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN, + "value=%d", poll_in, NULL); break; } @@ -288,8 +347,8 @@ __slot_update_events(struct event_slot_epoll *slot, int poll_in, int poll_out) /* do nothing */ break; default: - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT, - "invalid poll_out value %d", poll_out); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT, + "value=%d", poll_out, NULL); break; } } @@ -297,7 +356,7 @@ __slot_update_events(struct event_slot_epoll *slot, int poll_in, int poll_out) int event_register_epoll(struct event_pool *event_pool, int fd, event_handler_t handler, void *data, int poll_in, - int poll_out) + int poll_out, char notify_poller_death) { int idx = -1; int ret = -1; @@ -328,17 +387,10 @@ event_register_epoll(struct event_pool *event_pool, int fd, if (destroy == 1) goto out; - idx = event_slot_alloc(event_pool, fd); + idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot); if (idx == -1) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, - "could not find slot for fd=%d", fd); - return -1; - } - - slot = event_slot_get(event_pool, idx); - if (!slot) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, - "could not find slot for fd=%d idx=%d", fd, idx); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd, + NULL); return -1; } @@ -373,10 +425,8 @@ event_register_epoll(struct event_pool *event_pool, int fd, UNLOCK(&slot->lock); if (ret == -1) { - gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_ADD_FAILED, - "failed to add fd(=%d) to " - "epoll fd(=%d)", - fd, event_pool->fd); + gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_ADD_FAILED, + "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL); event_slot_unref(event_pool, slot, idx); idx = -1; } @@ -405,8 +455,8 @@ event_unregister_epoll_common(struct event_pool *event_pool, int fd, int idx, slot = event_slot_get(event_pool, idx); if (!slot) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, - "could not find slot for fd=%d idx=%d", fd, idx); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd, + "idx=%d", idx, NULL); return -1; } @@ -417,10 +467,8 @@ event_unregister_epoll_common(struct event_pool *event_pool, int fd, int idx, ret = epoll_ctl(event_pool->fd, EPOLL_CTL_DEL, fd, NULL); if (ret == -1) { - gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_DEL_FAILED, - "fail to del " - "fd(=%d) from epoll fd(=%d)", - fd, event_pool->fd); + gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_DEL_FAILED, + "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL); goto unlock; } @@ -472,8 +520,8 @@ event_select_on_epoll(struct event_pool *event_pool, int fd, int idx, slot = event_slot_get(event_pool, idx); if (!slot) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, - "could not find slot for fd=%d idx=%d", fd, idx); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd, + "idx=%d", idx, NULL); return -1; } @@ -504,10 +552,8 @@ event_select_on_epoll(struct event_pool *event_pool, int fd, int idx, ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event); if (ret == -1) { - gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_MODIFY_FAILED, - "failed to " - "modify fd(=%d) events to %d", - fd, epoll_event.events); + gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_MODIFY_FAILED, + "fd=%d", fd, "events=%d", epoll_event.events, NULL); } } unlock: @@ -542,8 +588,8 @@ event_dispatch_epoll_handler(struct event_pool *event_pool, slot = event_slot_get(event_pool, idx); if (!slot) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, - "could not find slot for idx=%d", idx); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "idx=%d", idx, + NULL); return -1; } @@ -551,20 +597,17 @@ event_dispatch_epoll_handler(struct event_pool *event_pool, { fd = slot->fd; if (fd == -1) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_STALE_FD_FOUND, - "stale fd found on " - "idx=%d, gen=%d, events=%d, slot->gen=%d", - idx, gen, event->events, slot->gen); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_STALE_FD_FOUND, "idx=%d", + idx, "gen=%d", gen, "events=%d", event->events, + "slot->gen=%d", slot->gen, NULL); /* fd got unregistered in another thread */ goto pre_unlock; } if (gen != slot->gen) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_GENERATION_MISMATCH, - "generation " - "mismatch on idx=%d, gen=%d, slot->gen=%d, " - "slot->fd=%d", - idx, gen, slot->gen, slot->fd); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_GENERATION_MISMATCH, + "idx=%d", idx, "gen=%d", gen, "slot->gen=%d", slot->gen, + "slot->fd=%d", slot->fd, NULL); /* slot was re-used and therefore is another fd! */ goto pre_unlock; } @@ -572,6 +615,12 @@ event_dispatch_epoll_handler(struct event_pool *event_pool, handler = slot->handler; data = slot->data; + if (slot->in_handler > 0) { + /* Another handler is inprogress, skip this one. */ + handler = NULL; + goto pre_unlock; + } + if (slot->handled_error) { handled_error_previously = _gf_true; } else { @@ -588,10 +637,9 @@ pre_unlock: goto out; if (!handled_error_previously) { - ret = handler(fd, idx, gen, data, - (event->events & (EPOLLIN | EPOLLPRI)), - (event->events & (EPOLLOUT)), - (event->events & (EPOLLERR | EPOLLHUP))); + handler(fd, idx, gen, data, (event->events & (EPOLLIN | EPOLLPRI)), + (event->events & (EPOLLOUT)), + (event->events & (EPOLLERR | EPOLLHUP)), 0); } out: event_slot_unref(event_pool, slot, idx); @@ -607,7 +655,9 @@ event_dispatch_epoll_worker(void *data) struct event_thread_data *ev_data = data; struct event_pool *event_pool; int myindex = -1; - int timetodie = 0; + int timetodie = 0, gen = 0; + struct list_head poller_death_notify; + struct event_slot_epoll *slot = NULL, *tmp = NULL; GF_VALIDATE_OR_GOTO("event", ev_data, out); @@ -616,10 +666,8 @@ event_dispatch_epoll_worker(void *data) GF_VALIDATE_OR_GOTO("event", event_pool, out); - gf_msg("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD, - "Started" - " thread with index %d", - myindex); + gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD, "index=%d", + myindex - 1, NULL); pthread_mutex_lock(&event_pool->mutex); { @@ -637,20 +685,55 @@ event_dispatch_epoll_worker(void *data) pthread_mutex_lock(&event_pool->mutex); { if (event_pool->eventthreadcount < myindex) { + while (event_pool->poller_death_sliced) { + pthread_cond_wait(&event_pool->cond, + &event_pool->mutex); + } + + INIT_LIST_HEAD(&poller_death_notify); /* if found true in critical section, * die */ event_pool->pollers[myindex - 1] = 0; event_pool->activethreadcount--; timetodie = 1; + gen = ++event_pool->poller_gen; + list_for_each_entry(slot, &event_pool->poller_death, + poller_death) + { + event_slot_ref(slot); + } + + list_splice_init(&event_pool->poller_death, + &poller_death_notify); + event_pool->poller_death_sliced = 1; pthread_cond_broadcast(&event_pool->cond); } } pthread_mutex_unlock(&event_pool->mutex); if (timetodie) { - gf_msg("epoll", GF_LOG_INFO, 0, LG_MSG_EXITED_EPOLL_THREAD, - "Exited " - "thread with index %d", - myindex); + list_for_each_entry(slot, &poller_death_notify, poller_death) + { + slot->handler(slot->fd, 0, gen, slot->data, 0, 0, 0, 1); + } + + pthread_mutex_lock(&event_pool->mutex); + { + list_for_each_entry_safe(slot, tmp, &poller_death_notify, + poller_death) + { + __event_slot_unref(event_pool, slot, slot->idx); + } + + list_splice(&poller_death_notify, + &event_pool->poller_death); + event_pool->poller_death_sliced = 0; + pthread_cond_broadcast(&event_pool->cond); + } + pthread_mutex_unlock(&event_pool->mutex); + + gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_EXITED_EPOLL_THREAD, + "index=%d", myindex, NULL); + goto out; } } @@ -667,8 +750,8 @@ event_dispatch_epoll_worker(void *data) ret = event_dispatch_epoll_handler(event_pool, &event); if (ret) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_EXITED_EPOLL_THREAD, - "Failed to dispatch handler"); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_DISPATCH_HANDLER_FAILED, + NULL); } } out: @@ -687,9 +770,6 @@ event_dispatch_epoll(struct event_pool *event_pool) int pollercount = 0; int ret = -1; struct event_thread_data *ev_data = NULL; - char thread_name[GF_THREAD_NAMEMAX] = { - 0, - }; /* Start the configured number of pollers */ pthread_mutex_lock(&event_pool->mutex); @@ -723,10 +803,8 @@ event_dispatch_epoll(struct event_pool *event_pool) ev_data->event_pool = event_pool; ev_data->event_index = i + 1; - snprintf(thread_name, sizeof(thread_name), "epoll%03hx", - (i & 0x3ff)); ret = gf_thread_create(&t_id, NULL, event_dispatch_epoll_worker, - ev_data, thread_name); + ev_data, "epoll%03hx", i & 0x3ff); if (!ret) { event_pool->pollers[i] = t_id; @@ -737,9 +815,8 @@ event_dispatch_epoll(struct event_pool *event_pool) if (i != 0) pthread_detach(event_pool->pollers[i]); } else { - gf_msg("epoll", GF_LOG_WARNING, 0, - LG_MSG_START_EPOLL_THREAD_FAILED, - "Failed to start thread for index %d", i); + gf_smsg("epoll", GF_LOG_WARNING, 0, + LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i, NULL); if (i == 0) { GF_FREE(ev_data); break; @@ -789,9 +866,6 @@ event_reconfigure_threads_epoll(struct event_pool *event_pool, int value) pthread_t t_id; int oldthreadcount; struct event_thread_data *ev_data = NULL; - char thread_name[GF_THREAD_NAMEMAX] = { - 0, - }; pthread_mutex_lock(&event_pool->mutex); { @@ -831,17 +905,13 @@ event_reconfigure_threads_epoll(struct event_pool *event_pool, int value) ev_data->event_pool = event_pool; ev_data->event_index = i + 1; - snprintf(thread_name, sizeof(thread_name), "epoll%03hx", - (i & 0x3ff)); ret = gf_thread_create(&t_id, NULL, event_dispatch_epoll_worker, ev_data, - thread_name); + "epoll%03hx", i & 0x3ff); if (ret) { - gf_msg("epoll", GF_LOG_WARNING, 0, - LG_MSG_START_EPOLL_THREAD_FAILED, - "Failed to start thread" - " for index %d", - i); + gf_smsg("epoll", GF_LOG_WARNING, 0, + LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i, + NULL); GF_FREE(ev_data); } else { pthread_detach(t_id); @@ -904,8 +974,8 @@ event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen) slot = event_slot_get(event_pool, idx); if (!slot) { - gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, - "could not find slot for fd=%d idx=%d", fd, idx); + gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd, + "idx=%d", idx, NULL); return -1; } @@ -924,7 +994,7 @@ event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen) " from gen=%d to slot->gen=%d, fd=%d, " "slot->fd=%d", idx, gen, slot->gen, fd, slot->fd); - goto post_unlock; + goto unlock; } /* This call also picks up the changes made by another @@ -939,7 +1009,7 @@ event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen) ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event); } } -post_unlock: +unlock: UNLOCK(&slot->lock); event_slot_unref(event_pool, slot, idx); diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c index 4845330f62b..379fed866be 100644 --- a/libglusterfs/src/event-history.c +++ b/libglusterfs/src/event-history.c @@ -8,8 +8,8 @@ cases as published by the Free Software Foundation. */ -#include "event-history.h" -#include "libglusterfs-messages.h" +#include "glusterfs/event-history.h" +#include "glusterfs/libglusterfs-messages.h" eh_t * eh_new(size_t buffer_size, gf_boolean_t use_buffer_once, diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c index 727d2a000a2..2cba963f096 100644 --- a/libglusterfs/src/event-poll.c +++ b/libglusterfs/src/event-poll.c @@ -16,12 +16,12 @@ #include <errno.h> #include <string.h> -#include "logging.h" -#include "gf-event.h" -#include "mem-pool.h" -#include "common-utils.h" -#include "syscall.h" -#include "libglusterfs-messages.h" +#include "glusterfs/logging.h" +#include "glusterfs/gf-event.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/syscall.h" +#include "glusterfs/libglusterfs-messages.h" struct event_slot_poll { int fd; @@ -33,29 +33,27 @@ struct event_slot_poll { static int event_register_poll(struct event_pool *event_pool, int fd, event_handler_t handler, void *data, int poll_in, - int poll_out); + int poll_out, char notify_poller_death); -static int +static void __flush_fd(int fd, int idx, int gen, void *data, int poll_in, int poll_out, - int poll_err) + int poll_err, char event_thread_died) { char buf[64]; int ret = -1; if (!poll_in) - return ret; + return; do { ret = sys_read(fd, buf, 64); if (ret == -1 && errno != EAGAIN) { - gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, - "read on %d returned " - "error", - fd); + gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_READ_FILE_FAILED, + "fd=%d", fd, NULL); } } while (ret == 64); - return ret; + return; } static int @@ -111,8 +109,7 @@ event_pool_new_poll(int count, int eventthreadcount) ret = pipe(event_pool->breaker); if (ret == -1) { - gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_PIPE_CREATE_FAILED, - "pipe creation failed"); + gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_PIPE_CREATE_FAILED, NULL); GF_FREE(event_pool->reg); GF_FREE(event_pool); return NULL; @@ -120,8 +117,7 @@ event_pool_new_poll(int count, int eventthreadcount) ret = fcntl(event_pool->breaker[0], F_SETFL, O_NONBLOCK); if (ret == -1) { - gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, - "could not set pipe to non blocking mode"); + gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL); sys_close(event_pool->breaker[0]); sys_close(event_pool->breaker[1]); event_pool->breaker[0] = event_pool->breaker[1] = -1; @@ -133,8 +129,7 @@ event_pool_new_poll(int count, int eventthreadcount) ret = fcntl(event_pool->breaker[1], F_SETFL, O_NONBLOCK); if (ret == -1) { - gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, - "could not set pipe to non blocking mode"); + gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL); sys_close(event_pool->breaker[0]); sys_close(event_pool->breaker[1]); @@ -146,10 +141,9 @@ event_pool_new_poll(int count, int eventthreadcount) } ret = event_register_poll(event_pool, event_pool->breaker[0], __flush_fd, - NULL, 1, 0); + NULL, 1, 0, 0); if (ret == -1) { - gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED, - "could not register pipe fd with poll event loop"); + gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED, NULL); sys_close(event_pool->breaker[0]); sys_close(event_pool->breaker[1]); event_pool->breaker[0] = event_pool->breaker[1] = -1; @@ -160,11 +154,8 @@ event_pool_new_poll(int count, int eventthreadcount) } if (eventthreadcount > 1) { - gf_msg("poll", GF_LOG_INFO, 0, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS, - "Currently poll " - "does not use multiple event processing threads, " - "thread count (%d) ignored", - eventthreadcount); + gf_smsg("poll", GF_LOG_INFO, 0, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS, + "count=%d", eventthreadcount, NULL); } /* although, eventhreadcount for poll implementation is always @@ -180,7 +171,7 @@ event_pool_new_poll(int count, int eventthreadcount) static int event_register_poll(struct event_pool *event_pool, int fd, event_handler_t handler, void *data, int poll_in, - int poll_out) + int poll_out, char notify_poller_death) { int idx = -1; @@ -215,8 +206,8 @@ event_register_poll(struct event_pool *event_pool, int fd, /* do nothing */ break; default: - gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN, - "invalid poll_in value %d", poll_in); + gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN, + "value=%d", poll_in, NULL); break; } @@ -231,8 +222,8 @@ event_register_poll(struct event_pool *event_pool, int fd, /* do nothing */ break; default: - gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT, - "invalid poll_out value %d", poll_out); + gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT, + "value=%d", poll_out, NULL); break; } @@ -257,8 +248,8 @@ event_unregister_poll(struct event_pool *event_pool, int fd, int idx_hint) idx = __event_getindex(event_pool, fd, idx_hint); if (idx == -1) { - gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, - "index not found for fd=%d (idx_hint=%d)", fd, idx_hint); + gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d", + fd, "idx_hint=%d", idx_hint, NULL); errno = ENOENT; goto unlock; } @@ -298,8 +289,8 @@ event_select_on_poll(struct event_pool *event_pool, int fd, int idx_hint, idx = __event_getindex(event_pool, fd, idx_hint); if (idx == -1) { - gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, - "index not found for fd=%d (idx_hint=%d)", fd, idx_hint); + gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d", + fd, "idx_hint=%d", idx_hint, NULL); errno = ENOENT; goto unlock; } @@ -361,10 +352,8 @@ event_dispatch_poll_handler(struct event_pool *event_pool, struct pollfd *ufds, idx = __event_getindex(event_pool, ufds[i].fd, i); if (idx == -1) { - gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, - "index not found for " - "fd=%d (idx_hint=%d)", - ufds[i].fd, i); + gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d", + ufds[i].fd, "idx_hint=%d", i, NULL); goto unlock; } @@ -375,10 +364,10 @@ unlock: pthread_mutex_unlock(&event_pool->mutex); if (handler) - ret = handler(ufds[i].fd, idx, 0, data, - (ufds[i].revents & (POLLIN | POLLPRI)), - (ufds[i].revents & (POLLOUT)), - (ufds[i].revents & (POLLERR | POLLHUP | POLLNVAL))); + handler(ufds[i].fd, idx, 0, data, + (ufds[i].revents & (POLLIN | POLLPRI)), + (ufds[i].revents & (POLLOUT)), + (ufds[i].revents & (POLLERR | POLLHUP | POLLNVAL)), 0); return ret; } diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c index 5b4d43e8650..402c253ca25 100644 --- a/libglusterfs/src/event.c +++ b/libglusterfs/src/event.c @@ -16,15 +16,14 @@ #include <errno.h> #include <string.h> -#include "logging.h" -#include "gf-event.h" -#include "mem-pool.h" -#include "common-utils.h" -#include "libglusterfs-messages.h" -#include "syscall.h" +#include "glusterfs/gf-event.h" +#include "glusterfs/timespec.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/libglusterfs-messages.h" +#include "glusterfs/syscall.h" struct event_pool * -event_pool_new(int count, int eventthreadcount) +gf_event_pool_new(int count, int eventthreadcount) { struct event_pool *event_pool = NULL; extern struct event_ops event_ops_poll; @@ -53,21 +52,22 @@ event_pool_new(int count, int eventthreadcount) } int -event_register(struct event_pool *event_pool, int fd, event_handler_t handler, - void *data, int poll_in, int poll_out) +gf_event_register(struct event_pool *event_pool, int fd, + event_handler_t handler, void *data, int poll_in, + int poll_out, char notify_poller_death) { int ret = -1; GF_VALIDATE_OR_GOTO("event", event_pool, out); - ret = event_pool->ops->event_register(event_pool, fd, handler, data, - poll_in, poll_out); + ret = event_pool->ops->event_register( + event_pool, fd, handler, data, poll_in, poll_out, notify_poller_death); out: return ret; } int -event_unregister(struct event_pool *event_pool, int fd, int idx) +gf_event_unregister(struct event_pool *event_pool, int fd, int idx) { int ret = -1; @@ -80,7 +80,7 @@ out: } int -event_unregister_close(struct event_pool *event_pool, int fd, int idx) +gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx) { int ret = -1; @@ -93,8 +93,8 @@ out: } int -event_select_on(struct event_pool *event_pool, int fd, int idx_hint, - int poll_in, int poll_out) +gf_event_select_on(struct event_pool *event_pool, int fd, int idx_hint, + int poll_in, int poll_out) { int ret = -1; @@ -107,7 +107,7 @@ out: } int -event_dispatch(struct event_pool *event_pool) +gf_event_dispatch(struct event_pool *event_pool) { int ret = -1; @@ -122,7 +122,7 @@ out: } int -event_reconfigure_threads(struct event_pool *event_pool, int value) +gf_event_reconfigure_threads(struct event_pool *event_pool, int value) { int ret = -1; @@ -136,7 +136,7 @@ out: } int -event_pool_destroy(struct event_pool *event_pool) +gf_event_pool_destroy(struct event_pool *event_pool) { int ret = -1; int destroy = 0, activethreadcount = 0; @@ -159,12 +159,12 @@ out: return ret; } -int +void poller_destroy_handler(int fd, int idx, int gen, void *data, int poll_out, - int poll_in, int poll_err) + int poll_in, int poll_err, char event_thread_exit) { struct event_destroy_data *destroy = NULL; - int readfd = -1, ret = -1; + int readfd = -1; char buf = '\0'; destroy = data; @@ -176,15 +176,14 @@ poller_destroy_handler(int fd, int idx, int gen, void *data, int poll_out, while (sys_read(readfd, &buf, 1) > 0) { } - ret = 0; out: - event_handled(destroy->pool, fd, idx, gen); + gf_event_handled(destroy->pool, fd, idx, gen); - return ret; + return; } /* This function destroys all the poller threads. - * Note: to be called before event_pool_destroy is called. + * Note: to be called before gf_event_pool_destroy is called. * The order in which cleaning is performed: * - Register a pipe fd(this is for waking threads in poll()/epoll_wait()) * - Set the destroy mode, which this no new event registration will succeed @@ -195,7 +194,7 @@ out: * threads are destroyed) */ int -event_dispatch_destroy(struct event_pool *event_pool) +gf_event_dispatch_destroy(struct event_pool *event_pool) { int ret = -1, threadcount = 0; int fd[2] = {-1}; @@ -233,8 +232,8 @@ event_dispatch_destroy(struct event_pool *event_pool) /* From the main thread register an event on the pipe fd[0], */ - idx = event_register(event_pool, fd[0], poller_destroy_handler, &data, 1, - 0); + idx = gf_event_register(event_pool, fd[0], poller_destroy_handler, &data, 1, + 0, 0); if (idx < 0) goto out; @@ -248,7 +247,7 @@ event_dispatch_destroy(struct event_pool *event_pool) } pthread_mutex_unlock(&event_pool->mutex); - ret = event_reconfigure_threads(event_pool, 0); + ret = gf_event_reconfigure_threads(event_pool, 0); if (ret < 0) goto out; @@ -268,7 +267,8 @@ event_dispatch_destroy(struct event_pool *event_pool) if (sys_write(fd[1], "dummy", 6) == -1) { break; } - sleep_till.tv_sec = time(NULL) + 1; + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; ret = pthread_cond_timedwait(&event_pool->cond, &event_pool->mutex, &sleep_till); if (ret) { @@ -282,7 +282,7 @@ event_dispatch_destroy(struct event_pool *event_pool) } pthread_mutex_unlock(&event_pool->mutex); - ret = event_unregister(event_pool, fd[0], idx); + ret = gf_event_unregister(event_pool, fd[0], idx); out: if (fd[0] != -1) @@ -294,7 +294,7 @@ out: } int -event_handled(struct event_pool *event_pool, int fd, int idx, int gen) +gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen) { int ret = 0; diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c index b1fa057b81e..33157549897 100644 --- a/libglusterfs/src/events.c +++ b/libglusterfs/src/events.c @@ -19,11 +19,11 @@ #include <netinet/in.h> #include <netdb.h> -#include "syscall.h" -#include "mem-pool.h" -#include "glusterfs.h" -#include "globals.h" -#include "events.h" +#include "glusterfs/syscall.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/globals.h" +#include "glusterfs/events.h" #define EVENT_HOST "127.0.0.1" #define EVENT_PORT 24009 @@ -34,56 +34,66 @@ _gf_event(eventtypes_t event, const char *fmt, ...) int ret = 0; int sock = -1; char *eventstr = NULL; - struct sockaddr_in server; va_list arguments; char *msg = NULL; glusterfs_ctx_t *ctx = NULL; char *host = NULL; struct addrinfo hints; struct addrinfo *result = NULL; + struct addrinfo *iter_result_ptr = NULL; + xlator_t *this = THIS; + char *volfile_server_transport = NULL; /* Global context */ - ctx = THIS->ctx; + ctx = this->ctx; if (event < 0 || event >= EVENT_LAST) { ret = EVENT_ERROR_INVALID_INPUTS; goto out; } - /* Initialize UDP socket */ - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - ret = EVENT_ERROR_SOCKET; - goto out; + if (ctx) { + volfile_server_transport = ctx->cmd_args.volfile_server_transport; + } + if (!volfile_server_transport) { + volfile_server_transport = "tcp"; + } + + /* host = NULL returns localhost */ + if (ctx && ctx->cmd_args.volfile_server && + (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set + use that information to push the events. */ + host = ctx->cmd_args.volfile_server; } memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_flags = AI_ADDRCONFIG; - /* Get Host name to send message */ - if (ctx && ctx->cmd_args.volfile_server) { - /* If it is client code then volfile_server is set - use that information to push the events. */ - if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints, - &result)) != 0) { - ret = EVENT_ERROR_RESOLVE; - goto out; - } + if ((getaddrinfo(host, TOSTRING(EVENT_PORT), &hints, &result)) != 0) { + ret = EVENT_ERROR_RESOLVE; + goto out; + } - if (get_ip_from_addrinfo(result, &host) == NULL) { - ret = EVENT_ERROR_RESOLVE; - goto out; + // iterate over the result and break when socket creation is success. + for (iter_result_ptr = result; iter_result_ptr != NULL; + iter_result_ptr = iter_result_ptr->ai_next) { + sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype, + iter_result_ptr->ai_protocol); + if (sock != -1) { + break; } - } else { - /* Localhost, Use the defined IP for localhost */ - host = gf_strdup(EVENT_HOST); } - - /* Socket Configurations */ - server.sin_family = AF_INET; - server.sin_port = htons(EVENT_PORT); - server.sin_addr.s_addr = inet_addr(host); - memset(&server.sin_zero, '\0', sizeof(server.sin_zero)); + /* + * If none of the addrinfo structures lead to a successful socket + * creation, socket creation has failed. + */ + if (sock < 0) { + ret = EVENT_ERROR_SOCKET; + goto out; + } va_start(arguments, fmt); ret = gf_vasprintf(&msg, fmt, arguments); @@ -94,16 +104,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...) goto out; } - ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg); - + ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)gf_time(), event, msg); + GF_FREE(msg); if (ret <= 0) { ret = EVENT_ERROR_MSG_FORMAT; goto out; } /* Send Message */ - if (sendto(sock, eventstr, strlen(eventstr), 0, (struct sockaddr *)&server, - sizeof(server)) <= 0) { + if (sendto(sock, eventstr, strlen(eventstr), 0, result->ai_addr, + result->ai_addrlen) <= 0) { ret = EVENT_ERROR_SEND; goto out; } @@ -115,17 +125,10 @@ out: sys_close(sock); } - /* Allocated by gf_vasprintf */ - if (msg) - GF_FREE(msg); - /* Allocated by gf_asprintf */ if (eventstr) GF_FREE(eventstr); - if (host) - GF_FREE(host); - if (result) freeaddrinfo(result); diff --git a/libglusterfs/src/fd-lk.c b/libglusterfs/src/fd-lk.c index a6680661be6..c2d34f81c9c 100644 --- a/libglusterfs/src/fd-lk.c +++ b/libglusterfs/src/fd-lk.c @@ -8,9 +8,9 @@ cases as published by the Free Software Foundation. */ -#include "fd-lk.h" -#include "common-utils.h" -#include "libglusterfs-messages.h" +#include "glusterfs/fd-lk.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/libglusterfs-messages.h" int32_t _fd_lk_delete_lock(fd_lk_ctx_node_t *lock) diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c index 6c521317110..62606e91164 100644 --- a/libglusterfs/src/fd.c +++ b/libglusterfs/src/fd.c @@ -8,11 +8,12 @@ cases as published by the Free Software Foundation. */ -#include "fd.h" -#include "glusterfs.h" -#include "dict.h" -#include "statedump.h" -#include "libglusterfs-messages.h" +#include "glusterfs/fd.h" +#include <errno.h> // for EINVAL, errno, ENOMEM +#include <inttypes.h> // for PRIu64 +#include <stdint.h> // for UINT32_MAX +#include <string.h> // for NULL, memcpy, memset, size_t +#include "glusterfs/statedump.h" static int gf_fd_fdtable_expand(fdtable_t *fdtable, uint32_t nr); @@ -501,6 +502,32 @@ out: } void +fd_close(fd_t *fd) +{ + xlator_t *xl, *old_THIS; + + old_THIS = THIS; + + for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) { + if (!xl->call_cleanup) { + THIS = xl; + + if (IA_ISDIR(fd->inode->ia_type)) { + if (xl->cbks->fdclosedir != NULL) { + xl->cbks->fdclosedir(xl, fd); + } + } else { + if (xl->cbks->fdclose != NULL) { + xl->cbks->fdclose(xl, fd); + } + } + } + } + + THIS = old_THIS; +} + +void fd_unref(fd_t *fd) { int32_t refcount = 0; @@ -532,7 +559,7 @@ fd_unref(fd_t *fd) return; } -fd_t * +static fd_t * __fd_bind(fd_t *fd) { list_del_init(&fd->inode_list); @@ -562,9 +589,9 @@ fd_bind(fd_t *fd) } static fd_t * -__fd_create(inode_t *inode, uint64_t pid) +fd_allocate(inode_t *inode, uint64_t pid) { - fd_t *fd = NULL; + fd_t *fd; if (inode == NULL) { gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, @@ -573,64 +600,67 @@ __fd_create(inode_t *inode, uint64_t pid) } fd = mem_get0(inode->table->fd_mem_pool); - if (!fd) - goto out; + if (fd == NULL) { + return NULL; + } fd->xl_count = inode->table->xl->graph->xl_count + 1; fd->_ctx = GF_CALLOC(1, (sizeof(struct _fd_ctx) * fd->xl_count), gf_common_mt_fd_ctx); - if (!fd->_ctx) - goto free_fd; + if (fd->_ctx == NULL) { + goto failed; + } fd->lk_ctx = fd_lk_ctx_create(); - if (!fd->lk_ctx) - goto free_fd_ctx; - - fd->inode = inode_ref(inode); - fd->pid = pid; - INIT_LIST_HEAD(&fd->inode_list); - - LOCK_INIT(&fd->lock); -out: - return fd; + if (fd->lk_ctx != NULL) { + /* We need to take a reference from the inode, but we cannot do it + * here because this function can be called with the inode lock taken + * and inode_ref() takes the inode's table lock. This is the reverse + * of the logical lock acquisition order and can cause a deadlock. So + * we simply assign the inode here and we delefate the inode reference + * responsibility to the caller (when this function succeeds and the + * inode lock is released). This is safe because the caller must hold + * a reference of the inode to use it, so it's guaranteed that the + * number of references won't reach 0 before the caller finishes. + * + * TODO: minimize use of locks in favor of atomic operations to avoid + * these dependencies. */ + fd->inode = inode; + fd->pid = pid; + INIT_LIST_HEAD(&fd->inode_list); + LOCK_INIT(&fd->lock); + GF_ATOMIC_INIT(fd->refcount, 1); + return fd; + } -free_fd_ctx: GF_FREE(fd->_ctx); -free_fd: + +failed: mem_put(fd); return NULL; } fd_t * -fd_create(inode_t *inode, pid_t pid) +fd_create_uint64(inode_t *inode, uint64_t pid) { - fd_t *fd = NULL; - - fd = __fd_create(inode, (uint64_t)pid); - if (!fd) - goto out; + fd_t *fd; - fd = fd_ref(fd); + fd = fd_allocate(inode, pid); + if (fd != NULL) { + /* fd_allocate() doesn't get a reference from the inode. We need to + * take it here in case of success. */ + inode_ref(inode); + } -out: return fd; } fd_t * -fd_create_uint64(inode_t *inode, uint64_t pid) +fd_create(inode_t *inode, pid_t pid) { - fd_t *fd = NULL; - - fd = __fd_create(inode, pid); - if (!fd) - goto out; - - fd = fd_ref(fd); - -out: - return fd; + return fd_create_uint64(inode, (uint64_t)pid); } static fd_t * @@ -719,10 +749,13 @@ __fd_lookup_anonymous(inode_t *inode, int32_t flags) return fd; } -static fd_t * -__fd_anonymous(inode_t *inode, int32_t flags) +fd_t * +fd_anonymous_with_flags(inode_t *inode, int32_t flags) { fd_t *fd = NULL; + bool ref = false; + + LOCK(&inode->lock); fd = __fd_lookup_anonymous(inode, flags); @@ -730,54 +763,33 @@ __fd_anonymous(inode_t *inode, int32_t flags) __fd_lookup_anonymous(), so no need of one more fd_ref(). if (!fd); then both create and bind won't bump up the ref count, so we have to call fd_ref() after bind. */ - if (!fd) { - fd = __fd_create(inode, 0); - - if (!fd) - return NULL; - - fd->anonymous = _gf_true; - fd->flags = GF_ANON_FD_FLAGS | flags; + if (fd == NULL) { + fd = fd_allocate(inode, 0); + if (fd != NULL) { + fd->anonymous = _gf_true; + fd->flags = GF_ANON_FD_FLAGS | (flags & O_DIRECT); - __fd_bind(fd); + __fd_bind(fd); - __fd_ref(fd); + ref = true; + } } - return fd; -} - -fd_t * -fd_anonymous(inode_t *inode) -{ - fd_t *fd = NULL; + UNLOCK(&inode->lock); - LOCK(&inode->lock); - { - fd = __fd_anonymous(inode, GF_ANON_FD_FLAGS); + if (ref) { + /* fd_allocate() doesn't get a reference from the inode. We need to + * take it here in case of success. */ + inode_ref(inode); } - UNLOCK(&inode->lock); return fd; } fd_t * -fd_anonymous_with_flags(inode_t *inode, int32_t flags) +fd_anonymous(inode_t *inode) { - fd_t *fd = NULL; - - LOCK(&inode->lock); - { - if (flags & O_DIRECT) - flags = GF_ANON_FD_FLAGS | O_DIRECT; - else - flags = GF_ANON_FD_FLAGS; - - fd = __fd_anonymous(inode, flags); - } - UNLOCK(&inode->lock); - - return fd; + return fd_anonymous_with_flags(inode, 0); } fd_t * @@ -1131,6 +1143,8 @@ fdentry_dump_to_dict(fdentry_t *fdentry, char *prefix, dict_t *dict, snprintf(key, sizeof(key), "%s.flags", prefix); ret = dict_set_int32(dict, key, fdentry->fd->flags); + if (ret) + return; (*openfds)++; } @@ -1180,6 +1194,8 @@ fdtable_dump_to_dict(fdtable_t *fdtable, char *prefix, dict_t *dict) snprintf(key, sizeof(key), "%s.fdtable.openfds", prefix); ret = dict_set_int32(dict, key, openfds); + if (ret) + goto out; out: pthread_rwlock_unlock(&fdtable->lock); diff --git a/libglusterfs/src/gen-defaults.py b/libglusterfs/src/gen-defaults.py index e31d3a9fe8a..e31d3a9fe8a 100644..100755 --- a/libglusterfs/src/gen-defaults.py +++ b/libglusterfs/src/gen-defaults.py diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py index 026dfb6c75b..5b7aa4764a0 100755 --- a/libglusterfs/src/generator.py +++ b/libglusterfs/src/generator.py @@ -123,6 +123,8 @@ ops['fstat'] = ( ops['fsync'] = ( ('fop-arg', 'fd', 'fd_t *'), ('fop-arg', 'flags', 'int32_t'), + ('extra', 'preop', 'struct iatt', '&preop'), + ('extra', 'postop', 'struct iatt', '&postop'), ('fop-arg', 'xdata', 'dict_t *'), ('cbk-arg', 'prebuf', 'struct iatt *'), ('cbk-arg', 'postbuf', 'struct iatt *'), @@ -142,6 +144,8 @@ ops['writev'] = ( ('fop-arg', 'off', 'off_t', 'offset'), ('fop-arg', 'flags', 'uint32_t', 'flags'), ('fop-arg', 'iobref', 'struct iobref *'), + ('extra', 'preop', 'struct iatt', '&preop'), + ('extra', 'postop', 'struct iatt', '&postop'), ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'prebuf', 'struct iatt *'), ('cbk-arg', 'postbuf', 'struct iatt *'), @@ -154,6 +158,7 @@ ops['readv'] = ( ('fop-arg', 'size', 'size_t'), ('fop-arg', 'offset', 'off_t'), ('fop-arg', 'flags', 'uint32_t'), + ('extra', 'iatt', 'struct iatt', '&iatt'), ('fop-arg', 'xdata', 'dict_t *'), ('cbk-arg', 'vector', 'struct iovec *'), ('cbk-arg', 'count', 'int32_t'), @@ -298,6 +303,8 @@ ops['access'] = ( ops['ftruncate'] = ( ('fop-arg', 'fd', 'fd_t *', 'fd'), ('fop-arg', 'offset', 'off_t', 'offset'), + ('extra', 'preop', 'struct iatt', '&preop'), + ('extra', 'postop', 'struct iatt', '&postop'), ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'prebuf', 'struct iatt *'), ('cbk-arg', 'postbuf', 'struct iatt *'), @@ -592,6 +599,19 @@ ops['namelink'] = ( ('cbk-arg', 'xdata', 'dict_t *'), ) +ops['copy_file_range'] = ( + ('fop-arg', 'fd_in', 'fd_t *'), + ('fop-arg', 'off_in', 'off64_t '), + ('fop-arg', 'fd_out', 'fd_t *'), + ('fop-arg', 'off_out', 'off64_t '), + ('fop-arg', 'len', 'size_t'), + ('fop-arg', 'flags', 'uint32_t'), + ('fop-arg', 'xdata', 'dict_t *'), + ('cbk-arg', 'stbuf', 'struct iatt *'), + ('cbk-arg', 'prebuf_dst', 'struct iatt *'), + ('cbk-arg', 'postbuf_dst', 'struct iatt *'), + ('cbk-arg', 'xdata', 'dict_t *'), +) ##################################################################### xlator_cbks['forget'] = ( ('fn-arg', 'this', 'xlator_t *'), diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c index c06e959aeba..a809efc97ef 100644 --- a/libglusterfs/src/gf-dirent.c +++ b/libglusterfs/src/gf-dirent.c @@ -11,9 +11,8 @@ #include <stdio.h> #include <string.h> #include <stdint.h> -#include "compat.h" -#include "xlator.h" -#include "syncop.h" +#include "glusterfs/compat.h" +#include "glusterfs/syncop.h" #define ONE 1ULL #define PRESENT_D_OFF_BITS 63 @@ -277,7 +276,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol) gf_uuid_copy(loc.pargfid, parent->gfid); loc.name = entry->d_name; loc.parent = inode_ref(parent); - ret = inode_path(loc.inode, entry->d_name, &path); + ret = inode_path(loc.parent, entry->d_name, &path); loc.path = path; if (ret < 0) goto out; diff --git a/libglusterfs/src/gfdb/Makefile.am b/libglusterfs/src/gfdb/Makefile.am deleted file mode 100644 index 3931e694c24..00000000000 --- a/libglusterfs/src/gfdb/Makefile.am +++ /dev/null @@ -1,37 +0,0 @@ -libgfdb_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ - $(SQLITE_CFLAGS) -DDATADIR=\"$(localstatedir)\" - -libgfdb_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \ - -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src \ - -I$(top_builddir)/rpc/xdr/src \ - -DDATADIR=\"$(localstatedir)\" - -libgfdb_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(SQLITE_LIBS) $(UUID_LIBS) - -libgfdb_la_LDFLAGS = $(GF_LDFLAGS) -version-info $(LIBGLUSTERFS_LT_VERSION) - -libgfdbdir = $(includedir)/glusterfs/gfdb - -if BUILD_GFDB - lib_LTLIBRARIES = libgfdb.la -endif - -CONTRIB_BUILDDIR = $(top_builddir)/contrib - -libgfdb_la_SOURCES = gfdb_data_store.c gfdb_data_store_helper.c \ - gfdb_sqlite3_helper.c gfdb_sqlite3.c - -noinst_HEADERS = gfdb_data_store.h gfdb_data_store_types.h \ - gfdb_sqlite3_helper.h gfdb_sqlite3.h gfdb_mem-types.h \ - gfdb_data_store_helper.h - -libgfdb_HEADERS = gfdb_data_store.h gfdb_data_store_types.h \ - gfdb_data_store_helper.h gfdb_sqlite3.h gfdb_mem-types.h \ - gfdb_sqlite3_helper.h - -CLEANFILES = - -$(top_builddir)/libglusterfs/src/libglusterfs.la: - $(MAKE) -C $(top_builddir)/libglusterfs/src/ all diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c deleted file mode 100644 index 426596c571e..00000000000 --- a/libglusterfs/src/gfdb/gfdb_data_store.c +++ /dev/null @@ -1,803 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "gfdb_sqlite3.h" -#include "gfdb_data_store.h" -#include "list.h" -#include "libglusterfs-messages.h" - -/****************************************************************************** - * - * Database Connection utils/internals - * - * ****************************************************************************/ - -/* GFDB Connection Node: - * ~~~~~~~~~~~~~~~~~~~~ - * Represents the connection to the database while using libgfdb - * The connection node is not thread safe as far as fini_db is concerned. - * You can use a single connection node - * to do multithreaded db operations like insert/delete/find of records. - * But you need to wait for all the operating threads to complete i.e - * pthread_join() and then do fini_db() to kill the connection node. - * gfdb_conn_node_t is an opaque structure. - * */ -struct gfdb_conn_node_t { - gfdb_connection_t gfdb_connection; - struct list_head conn_list; -}; - -/* - * db_conn_list is the circular linked list which - * will have all the database connections for the process - * - * */ -static gfdb_conn_node_t *db_conn_list; - -/* - * db_conn_mutex is the mutex for db_conn_list - * - * */ -static pthread_mutex_t db_conn_mutex = PTHREAD_MUTEX_INITIALIZER; - -/*Checks the sanity of the connection node*/ -#define CHECK_CONN_NODE(_conn_node) \ - do { \ - GF_ASSERT(_conn_node); \ - GF_ASSERT(_conn_node->gfdb_connection.gf_db_connection); \ - } while (0) - -/* Checks the sanity of the connection node and goto */ -#define CHECK_CONN_NODE_GOTO(_conn_node, label) \ - do { \ - if (!_conn_node) { \ - goto label; \ - }; \ - if (!_conn_node->gfdb_connection.gf_db_connection) { \ - goto label; \ - }; \ - } while (0) - -/*Check if the conn node is first in the list*/ -#define IS_FIRST_NODE(db_conn_list, _conn_node) \ - ((_conn_node == db_conn_list) ? _gf_true : _gf_false) - -/*Check if the conn node is the only node in the list*/ -#define IS_THE_ONLY_NODE(_conn_node) \ - ((_conn_node->conn_list.next == _conn_node->conn_list.prev) ? _gf_true \ - : _gf_false) - -/*Internal Function: Adds connection node to the end of - * the db connection list.*/ -static int -add_connection_node(gfdb_conn_node_t *_conn_node) -{ - int ret = -1; - - GF_ASSERT(_conn_node); - - /*Lock the list*/ - ret = pthread_mutex_lock(&db_conn_mutex); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ret, LG_MSG_LOCK_LIST_FAILED, - "Failed lock db connection " - "list %s", - strerror(ret)); - ret = -1; - goto out; - } - - if (db_conn_list == NULL) { - db_conn_list = _conn_node; - } else { - list_add_tail(&_conn_node->conn_list, &db_conn_list->conn_list); - } - - /*unlock the list*/ - ret = pthread_mutex_unlock(&db_conn_mutex); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ret, LG_MSG_UNLOCK_LIST_FAILED, - "Failed unlock db " - "connection list %s", - strerror(ret)); - ret = -1; - goto out; - /*TODO What if the unlock fails. - * Will it lead to deadlock? - * Most of the gluster code - * no check for unlock or destroy of mutex!*/ - } - ret = 0; -out: - return ret; -} - -/*Internal Function: - * Delete connection node from the list*/ -static int -delete_conn_node(gfdb_conn_node_t *_conn_node) -{ - int ret = -1; - - GF_ASSERT(_conn_node); - - /*Lock of the list*/ - ret = pthread_mutex_lock(&db_conn_mutex); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ret, LG_MSG_LOCK_LIST_FAILED, - "Failed lock on db connection" - " list %s", - strerror(ret)); - goto out; - } - - /*Remove the connection object from list*/ - if (IS_THE_ONLY_NODE(_conn_node)) { - db_conn_list = NULL; - GF_FREE(_conn_node); - } else { - if (IS_FIRST_NODE(db_conn_list, _conn_node)) { - db_conn_list = list_entry(db_conn_list->conn_list.next, - gfdb_conn_node_t, conn_list); - } - list_del(&_conn_node->conn_list); - GF_FREE(_conn_node); - } - - /*Release the list lock*/ - ret = pthread_mutex_unlock(&db_conn_mutex); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_WARNING, ret, LG_MSG_UNLOCK_LIST_FAILED, - "Failed unlock on db " - "connection list %s", - strerror(ret)); - /*TODO What if the unlock fails. - * Will it lead to deadlock? - * Most of the gluster code - * no check for unlock or destroy of mutex!*/ - ret = -1; - goto out; - } - ret = 0; -out: - return ret; -} - -/*Internal function: Used initialize/map db operation of - * specified type of db plugin*/ -static int -init_db_operations(gfdb_db_type_t gfdb_db_type, - gfdb_db_operations_t *gfdb_db_operations) -{ - int ret = -1; - - GF_ASSERT(gfdb_db_operations); - - /*Clear the gfdb_db_operations*/ - gfdb_db_operations = memset(gfdb_db_operations, 0, - sizeof(*gfdb_db_operations)); - switch (gfdb_db_type) { - case GFDB_SQLITE3: - gf_sqlite3_fill_db_operations(gfdb_db_operations); - ret = 0; - break; - case GFDB_HYPERDEX: - case GFDB_HASH_FILE_STORE: - case GFDB_ROCKS_DB: - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_UNSUPPORTED_PLUGIN, - "Plugin not supported"); - break; - case GFDB_INVALID_DB: - case GFDB_DB_END: - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_INVALID_DB_TYPE, - "Invalid DB Type"); - break; - } - return ret; -} - -/****************************************************************************** - * - * LIBGFDB API Functions - * - * ****************************************************************************/ - -/*Libgfdb API Function: Used to initialize a db connection - * (Constructor function for db connection object) - * Arguments: - * args : Dictionary containing database specific parameters - * eg: For sqlite3, pagesize, cachesize, db name, db path - etc - * gfdb_db_type : Type of data base used i.e sqlite or hyperdex etc - * Returns : if successful return the GFDB Connection node to the caller or - * NULL in case of failure*/ -gfdb_conn_node_t * -init_db(dict_t *args, gfdb_db_type_t gfdb_db_type) -{ - int ret = -1; - gfdb_conn_node_t *_conn_node = NULL; - gfdb_db_operations_t *db_operations_t = NULL; - - /*Create data base connection object*/ - _conn_node = GF_CALLOC(1, sizeof(gfdb_conn_node_t), gf_mt_db_conn_node_t); - if (!_conn_node) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "Failed mem alloc for " - "gfdb_conn_node_t"); - goto alloc_failed; - } - - /*Init the list component of db connection object*/ - INIT_LIST_HEAD(&_conn_node->conn_list); - - /*Add created connection node to the list*/ - ret = add_connection_node(_conn_node); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_ADD_TO_LIST_FAILED, - "Failed to add connection " - "node to list"); - goto _conn_failed; - } - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - - /*init the db ops object of db connection object*/ - ret = init_db_operations(gfdb_db_type, db_operations_t); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_INIT_DB_FAILED, - "Failed initializing database " - "operation failed."); - goto init_db_failed; - } - - /*Calling the init_db_op of the respected db type*/ - GF_ASSERT(db_operations_t->init_db_op); - ret = db_operations_t->init_db_op( - args, &_conn_node->gfdb_connection.gf_db_connection); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_INIT_DB_FAILED, - "Failed initializing database"); - goto init_db_failed; - } - _conn_node->gfdb_connection.gfdb_db_type = gfdb_db_type; - ret = 0; - - return _conn_node; - - /*****Error Handling********/ - /* If init_db_operations or init_db of plugin failed delete - * conn node from the list. - * connection node will be free by delete_conn_node*/ -init_db_failed: - ret = delete_conn_node(_conn_node); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DELETE_FROM_LIST_FAILED, - "Failed deleting " - "connection node from list"); - } - return NULL; - /*if adding to the list failed free connection node*/ -_conn_failed: - GF_FREE(_conn_node); - /*if allocation failed*/ -alloc_failed: - return NULL; - /*****Error Handling********/ -} - -/*Libgfdb API Function: Used to terminate/de-initialize db connection - * (Destructor function for db connection object) - * Arguments: - * _conn_node : GFDB Connection node - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -fini_db(gfdb_conn_node_t *_conn_node) -{ - int ret = -1; - gfdb_db_operations_t *db_operations_t = NULL; - - CHECK_CONN_NODE_GOTO(_conn_node, empty); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - - GF_ASSERT(db_operations_t->fini_db_op); - - ret = db_operations_t->fini_db_op( - &_conn_node->gfdb_connection.gf_db_connection); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_CLOSE_CONNECTION_FAILED, - "Failed close the db " - "connection"); - goto out; - } - - ret = delete_conn_node(_conn_node); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DELETE_FROM_LIST_FAILED, - "Failed deleting " - "connection node from list"); - } -empty: - ret = 0; -out: - return ret; -} - -/*Libgfdb API Function: Used to insert/update records in the database - * NOTE: In current gfdb_sqlite plugin we use that - * same function to delete the record. Set the - * gfdb_fop_path to GFDB_FOP_UNDEL to delete the - * link of inode from GF_FLINK_TB and - * GFDB_FOP_UNDEL_ALL to delete all the records from - * GF_FLINK_TB and GF_FILE_TB. - * TODO: Should separate this function into the - * delete_record function - * Refer CTR Xlator features/changetimerecorder for usage - * Arguments: - * _conn_node : GFDB Connection node - * gfdb_db_record : Record to be inserted/updated - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -insert_record(gfdb_conn_node_t *_conn_node, gfdb_db_record_t *gfdb_db_record) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->insert_record_op) { - ret = db_operations_t->insert_record_op(gf_db_connection, - gfdb_db_record); - if (ret) { - gf_msg(GFDB_DATA_STORE, - _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors), - 0, LG_MSG_INSERT_OR_UPDATE_FAILED, - "Insert/Update" - " operation failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Used to delete record from the database - * NOTE: In the current gfdb_sqlite3 plugin - * implementation this function is dummy. - * Use the insert_record function. - * Refer CTR Xlator features/changetimerecorder for usage - * Arguments: - * _conn_node : GFDB Connection node - * gfdb_db_record : Record to be deleted - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -delete_record(gfdb_conn_node_t *_conn_node, gfdb_db_record_t *gfdb_db_record) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->delete_record_op) { - ret = db_operations_t->delete_record_op(gf_db_connection, - gfdb_db_record); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DELETE_FAILED, - "Delete operation " - "failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Compact the database. - * - * Arguments: - * _conn_node : GFDB Connection node - * _compact_active : Is compaction currently on? - * _compact_mode_switched : Was the compaction switch flipped? - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -compact_db(gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active, - gf_boolean_t _compact_mode_switched) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->compact_db_op) { - ret = db_operations_t->compact_db_op(gf_db_connection, _compact_active, - _compact_mode_switched); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_COMPACT_FAILED, - "Compaction operation " - "failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Query all the records from the database - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * query_limit : number to limit number of rows returned by the query - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_all(gfdb_conn_node_t *_conn_node, gf_query_callback_t query_callback, - void *_query_cbk_args, int query_limit) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->find_all_op) { - ret = db_operations_t->find_all_op(gf_db_connection, query_callback, - _query_cbk_args, query_limit); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, - "Find all operation " - "failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Query records/files that have not changed/accessed - * from a time in past to current time - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are not - * changed/accessed - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_unchanged_for_time(gfdb_conn_node_t *_conn_node, - gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *for_time) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->find_unchanged_for_time_op) { - ret = db_operations_t->find_unchanged_for_time_op( - gf_db_connection, query_callback, _query_cbk_args, for_time); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, - "Find unchanged " - "operation failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Query records/files that have changed/accessed from a - * time in past to current time - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are - * changed/accessed - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_recently_changed_files(gfdb_conn_node_t *_conn_node, - gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *from_time) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->find_recently_changed_files_op) { - ret = db_operations_t->find_recently_changed_files_op( - gf_db_connection, query_callback, _query_cbk_args, from_time); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, - "Find changed operation failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Query records/files that have not changed/accessed - * from a time in past to current time, with - * a desired frequency - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are not - * changed/accessed - * write_freq_thresold : Desired Write Frequency lower limit - * read_freq_thresold : Desired Read Frequency lower limit - * _clear_counters : If true, Clears all the frequency counters of - * all files. - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_unchanged_for_time_freq(gfdb_conn_node_t *_conn_node, - gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *for_time, - int write_freq_thresold, int read_freq_thresold, - gf_boolean_t _clear_counters) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->find_unchanged_for_time_freq_op) { - ret = db_operations_t->find_unchanged_for_time_freq_op( - gf_db_connection, query_callback, _query_cbk_args, for_time, - write_freq_thresold, read_freq_thresold, _clear_counters); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, - "Find unchanged with freq operation failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Query records/files that have changed/accessed from a - * time in past to current time, with - * a desired frequency - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are - * changed/accessed - * write_freq_thresold : Desired Write Frequency lower limit - * read_freq_thresold : Desired Read Frequency lower limit - * _clear_counters : If true, Clears all the frequency counters of - * all files. - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_recently_changed_files_freq(gfdb_conn_node_t *_conn_node, - gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *from_time, - int write_freq_thresold, - int read_freq_thresold, - gf_boolean_t _clear_counters) -{ - int ret = 0; - gfdb_db_operations_t *db_operations_t = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(_conn_node); - - db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = _conn_node->gfdb_connection.gf_db_connection; - - if (db_operations_t->find_recently_changed_files_freq_op) { - ret = db_operations_t->find_recently_changed_files_freq_op( - gf_db_connection, query_callback, _query_cbk_args, from_time, - write_freq_thresold, read_freq_thresold, _clear_counters); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, - "Find changed with freq operation failed"); - } - } - - return ret; -} - -/*Libgfdb API Function: Clear the heat for all the files - * - * Arguments: - * conn_node : GFDB Connection node - * - * Returns : if successful return 0 or - * -ve value in case of failure - **/ - -int -clear_files_heat(gfdb_conn_node_t *conn_node) -{ - int ret = 0; - gfdb_db_operations_t *db_operations = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(conn_node); - - db_operations = &conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = conn_node->gfdb_connection.gf_db_connection; - - if (db_operations->clear_files_heat_op) { - ret = db_operations->clear_files_heat_op(gf_db_connection); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, - LG_MSG_INSERT_OR_UPDATE_FAILED, - "Clear files heat operation failed"); - } - } - - return ret; -} - -/* Libgfdb API Function: Function to extract version of the db - * Input: - * gfdb_conn_node_t *conn_node : GFDB Connection node - * char **version : the version is extracted as a string and will be stored in - * this variable. The freeing of the memory should be done by - * the caller. - * Return: - * On success return the length of the version string that is - * extracted. - * On failure return -1 - * */ -int -get_db_version(gfdb_conn_node_t *conn_node, char **version) -{ - int ret = 0; - gfdb_db_operations_t *db_operations = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(conn_node); - - db_operations = &conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = conn_node->gfdb_connection.gf_db_connection; - - if (db_operations->get_db_version) { - ret = db_operations->get_db_version(gf_db_connection, version); - if (ret < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, - "Get version failed"); - } - } - - return ret; -} - -int -get_db_params(gfdb_conn_node_t *conn_node, char *param_key, char **param_value) -{ - int ret = -1; - gfdb_db_operations_t *db_operations = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(conn_node); - - db_operations = &conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = conn_node->gfdb_connection.gf_db_connection; - - if (db_operations->get_db_params) { - ret = db_operations->get_db_params(gf_db_connection, param_key, - param_value); - if (ret < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, - "Get setting failed"); - } - } - - return ret; -} - -int -set_db_params(gfdb_conn_node_t *conn_node, char *param_key, char *param_value) -{ - int ret = -1; - gfdb_db_operations_t *db_operations = NULL; - void *gf_db_connection = NULL; - - CHECK_CONN_NODE(conn_node); - - db_operations = &conn_node->gfdb_connection.gfdb_db_operations; - gf_db_connection = conn_node->gfdb_connection.gf_db_connection; - - if (db_operations->set_db_params) { - ret = db_operations->set_db_params(gf_db_connection, param_key, - param_value); - if (ret < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, - LG_MSG_INSERT_OR_UPDATE_FAILED, - "Failed to set database setting"); - } - } - - return ret; -} - -static const char * -get_db_path_key() -{ - return GFDB_SQL_PARAM_DBPATH; -} - -void -get_gfdb_methods(gfdb_methods_t *methods) -{ - methods->init_db = init_db; - methods->fini_db = fini_db; - methods->find_all = find_all; - methods->find_unchanged_for_time = find_unchanged_for_time; - methods->find_recently_changed_files = find_recently_changed_files; - methods->find_unchanged_for_time_freq = find_unchanged_for_time_freq; - methods - ->find_recently_changed_files_freq = find_recently_changed_files_freq; - methods->clear_files_heat = clear_files_heat; - methods->get_db_version = get_db_version; - methods->get_db_params = get_db_params; - methods->set_db_params = set_db_params; - methods->get_db_path_key = get_db_path_key; - - /* Query Record related functions */ - methods->gfdb_query_record_new = gfdb_query_record_new; - methods->gfdb_query_record_free = gfdb_query_record_free; - methods->gfdb_add_link_to_query_record = gfdb_add_link_to_query_record; - methods->gfdb_write_query_record = gfdb_write_query_record; - methods->gfdb_read_query_record = gfdb_read_query_record; - - /* Link info related functions */ - methods->gfdb_link_info_new = gfdb_link_info_new; - methods->gfdb_link_info_free = gfdb_link_info_free; - - /* Compaction related functions */ - methods->compact_db = compact_db; -} diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h deleted file mode 100644 index 59f7bd01ab3..00000000000 --- a/libglusterfs/src/gfdb/gfdb_data_store.h +++ /dev/null @@ -1,331 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef __GFDB_DATA_STORE_H -#define __GFDB_DATA_STORE_H - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" -#include <time.h> -#include <sys/time.h> - -#include "gfdb_data_store_types.h" - -/* GFDB Connection Node: - * ~~~~~~~~~~~~~~~~~~~~ - * Represents the connection to the database while using libgfdb - * The connection node is not thread safe as far as fini_db is concerned. - * You can use a single connection node - * to do multithreaded db operations like insert/delete/find of records. - * But you need to wait for all the operating threads to complete i.e - * pthread_join() and then do fini_db() to kill the connection node. - * gfdb_conn_node_t is an opaque structure. - * */ -typedef struct gfdb_conn_node_t gfdb_conn_node_t; - -/*Libgfdb API Function: Used to initialize db connection - * Arguments: - * args : Dictionary containing database specific parameters - * eg: For sqlite3, pagesize, cachesize, db name, db path - etc - * gfdb_db_type : Type of data base used i.e sqlite or hyperdex etc - * Returns : if successful return the GFDB Connection Node to the caller or - * NULL value in case of failure*/ -gfdb_conn_node_t * -init_db(dict_t *arg, gfdb_db_type_t db_type); - -typedef gfdb_conn_node_t *(*init_db_t)(dict_t *args, - gfdb_db_type_t gfdb_db_type); - -/*Libgfdb API Function: Used to terminate/de-initialize db connection - * (Destructor function for db connection object) - * Arguments: - * _conn_node : DB Connection Index of the DB Connection - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -fini_db(gfdb_conn_node_t *); - -typedef int (*fini_db_t)(gfdb_conn_node_t *_conn_node); - -/*Libgfdb API Function: Used to insert/updated records in the database - * NOTE: In current gfdb_sqlite plugin we use that - * same function to delete the record. Set the - * gfdb_fop_path to GFDB_FOP_UNDEL to delete the - * link of inode from GF_FLINK_TB and - * GFDB_FOP_UNDEL_ALL to delete all the records from - * GF_FLINK_TB and GF_FILE_TB. - * TODO: Should separate this function into the - * delete_record function - * Refer CTR Xlator features/changetimerecorder for usage - * Arguments: - * _conn_node : GFDB Connection node - * gfdb_db_record : Record to be inserted/updated - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -insert_record(gfdb_conn_node_t *, gfdb_db_record_t *gfdb_db_record); - -/*Libgfdb API Function: Used to delete record from the database - * NOTE: In the current gfdb_sqlite3 plugin - * implementation this function is dummy. - * Use the insert_record function. - * Refer CTR Xlator features/changetimerecorder for usage - * Arguments: - * _conn_node : GFDB Connection node - * gfdb_db_record : Record to be deleted - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -delete_record(gfdb_conn_node_t *, gfdb_db_record_t *gfdb_db_record); - -/*Libgfdb API Function: Query all the records from the database - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * query_limit : 0 - umlimited, - * any positive value - adds the LIMIT clause - * to the SQL query - * - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_all(gfdb_conn_node_t *, gf_query_callback_t query_callback, - void *_query_cbk_args, int query_limit); - -typedef int (*find_all_t)(gfdb_conn_node_t *, - gf_query_callback_t query_callback, - void *_query_cbk_args, int query_limit); - -/*Libgfdb API Function: Query records/files that have not changed/accessed - * from a time in past to current time - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are not - * changed/accessed - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_unchanged_for_time(gfdb_conn_node_t *, gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *for_time); - -typedef int (*find_unchanged_for_time_t)(gfdb_conn_node_t *_conn_node, - gf_query_callback_t query_callback, - void *_query_cbk_args, - gfdb_time_t *for_time); - -/*Libgfdb API Function: Query records/files that have changed/accessed from a - * time in past to current time - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are - * changed/accessed - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_recently_changed_files(gfdb_conn_node_t *_conn, - gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *from_time); - -typedef int (*find_recently_changed_files_t)(gfdb_conn_node_t *_conn_node, - gf_query_callback_t query_callback, - void *_query_cbk_args, - gfdb_time_t *from_time); - -/*Libgfdb API Function: Query records/files that have not changed/accessed - * from a time in past to current time, with - * a desired frequency - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are not - * changed/accessed - * write_freq_thresold : Desired Write Frequency lower limit - * read_freq_thresold : Desired Read Frequency lower limit - * _clear_counters : If true, Clears all the frequency counters of - * all files. - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_unchanged_for_time_freq(gfdb_conn_node_t *_conn, - gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *for_time, - int write_freq_thresold, int read_freq_thresold, - gf_boolean_t _clear_counters); - -typedef int (*find_unchanged_for_time_freq_t)( - gfdb_conn_node_t *_conn_node, gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *for_time, int write_freq_thresold, - int read_freq_thresold, gf_boolean_t _clear_counters); - -/*Libgfdb API Function: Query records/files that have changed/accessed from a - * time in past to current time, with - * a desired frequency - * Arguments: - * _conn_node : GFDB Connection node - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are - * changed/accessed - * write_freq_thresold : Desired Write Frequency lower limit - * read_freq_thresold : Desired Read Frequency lower limit - * _clear_counters : If true, Clears all the frequency counters of - * all files. - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -find_recently_changed_files_freq(gfdb_conn_node_t *_conn, - gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *from_time, - int write_freq_thresold, - int read_freq_thresold, - gf_boolean_t _clear_counters); - -typedef int (*find_recently_changed_files_freq_t)( - gfdb_conn_node_t *_conn_node, gf_query_callback_t query_callback, - void *_query_cbk_args, gfdb_time_t *from_time, int write_freq_thresold, - int read_freq_thresold, gf_boolean_t _clear_counters); - -typedef const char *(*get_db_path_key_t)(); - -/*Libgfdb API Function: Clear the heat for all the files - * - * Arguments: - * _conn_node : GFDB Connection node - * - * Returns : if successful return 0 or - * -ve value in case of failure - **/ -int -clear_files_heat(gfdb_conn_node_t *_conn_node); - -typedef int (*clear_files_heat_t)(gfdb_conn_node_t *_conn_node); - -/* Libgfdb API Function: Function to extract version of the db - * Arguments: - * gfdb_conn_node_t *_conn_node : GFDB Connection node - * char **version : the version is extracted as a string - * and will be stored in this variable. - * The freeing of the memory should be done by the caller. - * Return: - * On success return the length of the version string that is - * extracted. - * On failure return -1 - * */ -int -get_db_version(gfdb_conn_node_t *_conn_node, char **version); - -typedef int (*get_db_version_t)(gfdb_conn_node_t *_conn_node, char **version); - -/* Libgfdb API Function: Function to extract param from the db - * Arguments: - * gfdb_conn_node_t *_conn_node : GFDB Connection node - * char *param_key : param to be extracted - * char **param_value : the value of the param that is - * extracted. This function will allocate memory - * to pragma_value. The caller should free the memory. - * Return: - * On success return the length of the param value that is - * extracted. - * On failure return -1 - * */ -int -get_db_params(gfdb_conn_node_t *_conn_node, char *param_key, - char **param_value); - -typedef int (*get_db_params_t)(gfdb_conn_node_t *db_conn, char *param_key, - char **param_value); - -/* Libgfdb API Function: Function to set db params - * Arguments: - * gfdb_conn_node_t *_conn_node : GFDB Connection node - * char *param_key : param to be set - * char *param_value : param value - * Return: - * On success return 0 - * On failure return -1 - * */ -int -set_db_params(gfdb_conn_node_t *_conn_node, char *param_key, char *param_value); - -typedef int (*set_db_params_t)(gfdb_conn_node_t *db_conn, char *param_key, - char *param_value); - -/*Libgfdb API Function: Compact the database. - * - * Arguments: - * _conn_node : GFDB Connection node - * _compact_active : Is compaction currently on? - * _compact_mode_switched : Was the compaction switch flipped? - * Returns : if successful return 0 or - * -ve value in case of failure*/ -int -compact_db(gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active, - gf_boolean_t _compact_mode_switched); - -typedef int (*compact_db_t)(gfdb_conn_node_t *db_conn, - gf_boolean_t compact_active, - gf_boolean_t compact_mode_switched); - -typedef struct gfdb_methods_s { - init_db_t init_db; - fini_db_t fini_db; - find_all_t find_all; - find_unchanged_for_time_t find_unchanged_for_time; - find_recently_changed_files_t find_recently_changed_files; - find_unchanged_for_time_freq_t find_unchanged_for_time_freq; - find_recently_changed_files_freq_t find_recently_changed_files_freq; - clear_files_heat_t clear_files_heat; - get_db_version_t get_db_version; - get_db_params_t get_db_params; - set_db_params_t set_db_params; - /* Do not expose dbpath directly. Expose it via an */ - /* access function: get_db_path_key(). */ - char *dbpath; - get_db_path_key_t get_db_path_key; - - /* Query Record related functions */ - gfdb_query_record_new_t gfdb_query_record_new; - gfdb_query_record_free_t gfdb_query_record_free; - gfdb_add_link_to_query_record_t gfdb_add_link_to_query_record; - gfdb_write_query_record_t gfdb_write_query_record; - gfdb_read_query_record_t gfdb_read_query_record; - - /* Link info related functions */ - gfdb_link_info_new_t gfdb_link_info_new; - gfdb_link_info_free_t gfdb_link_info_free; - - /* Compaction related functions */ - compact_db_t compact_db; -} gfdb_methods_t; - -void -get_gfdb_methods(gfdb_methods_t *methods); - -typedef void (*get_gfdb_methods_t)(gfdb_methods_t *methods); - -#endif diff --git a/libglusterfs/src/gfdb/gfdb_data_store_helper.c b/libglusterfs/src/gfdb/gfdb_data_store_helper.c deleted file mode 100644 index fb01a6c699b..00000000000 --- a/libglusterfs/src/gfdb/gfdb_data_store_helper.c +++ /dev/null @@ -1,589 +0,0 @@ - -#include "gfdb_data_store_helper.h" -#include "syscall.h" - -/****************************************************************************** - * - * Query record related functions - * - * ****************************************************************************/ - -/*Create a single link info structure*/ -gfdb_link_info_t * -gfdb_link_info_new() -{ - gfdb_link_info_t *link_info = NULL; - - link_info = GF_CALLOC(1, sizeof(gfdb_link_info_t), gf_mt_gfdb_link_info_t); - if (!link_info) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "Memory allocation failed for " - "link_info "); - goto out; - } - - INIT_LIST_HEAD(&link_info->list); - -out: - - return link_info; -} - -/*Destroy a link info structure*/ -void -gfdb_link_info_free(gfdb_link_info_t *link_info) -{ - GF_FREE(link_info); -} - -/*Function to create the query_record*/ -gfdb_query_record_t * -gfdb_query_record_new() -{ - int ret = -1; - gfdb_query_record_t *query_record = NULL; - - query_record = GF_CALLOC(1, sizeof(gfdb_query_record_t), - gf_mt_gfdb_query_record_t); - if (!query_record) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "Memory allocation failed for " - "query_record "); - goto out; - } - - INIT_LIST_HEAD(&query_record->link_list); - - ret = 0; -out: - if (ret == -1) { - GF_FREE(query_record); - } - return query_record; -} - -/*Function to delete a single linkinfo from list*/ -static void -gfdb_delete_linkinfo_from_list(gfdb_link_info_t **link_info) -{ - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, link_info, out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, *link_info, out); - - /*Remove hard link from list*/ - list_del(&(*link_info)->list); - gfdb_link_info_free(*link_info); - link_info = NULL; -out: - return; -} - -/*Function to destroy link_info list*/ -void -gfdb_free_link_info_list(gfdb_query_record_t *query_record) -{ - gfdb_link_info_t *link_info = NULL; - gfdb_link_info_t *temp = NULL; - - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out); - - list_for_each_entry_safe(link_info, temp, &query_record->link_list, list) - { - gfdb_delete_linkinfo_from_list(&link_info); - link_info = NULL; - } - -out: - return; -} - -/* Function to add linkinfo to the query record */ -int -gfdb_add_link_to_query_record(gfdb_query_record_t *query_record, uuid_t pgfid, - char *base_name) -{ - int ret = -1; - gfdb_link_info_t *link_info = NULL; - int base_name_len = 0; - - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, pgfid, out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, base_name, out); - - link_info = gfdb_link_info_new(); - if (!link_info) { - goto out; - } - - gf_uuid_copy(link_info->pargfid, pgfid); - base_name_len = strlen(base_name); - memcpy(link_info->file_name, base_name, base_name_len); - link_info->file_name[base_name_len] = '\0'; - - list_add_tail(&link_info->list, &query_record->link_list); - - query_record->link_count++; - - ret = 0; -out: - if (ret) { - gfdb_link_info_free(link_info); - link_info = NULL; - } - return ret; -} - -/*Function to destroy query record*/ -void -gfdb_query_record_free(gfdb_query_record_t *query_record) -{ - if (query_record) { - gfdb_free_link_info_list(query_record); - GF_FREE(query_record); - } -} - -/****************************************************************************** - SERIALIZATION/DE-SERIALIZATION OF QUERY RECORD -*******************************************************************************/ -/****************************************************************************** - The on disk format of query record is as follows, - -+---------------------------------------------------------------------------+ -| Length of serialized query record | Serialized Query Record | -+---------------------------------------------------------------------------+ - 4 bytes Length of serialized query record - | - | - -------------------------------------------------| - | - | - V - Serialized Query Record Format: - +---------------------------------------------------------------------------+ - | GFID | Link count | <LINK INFO> |..... | FOOTER | - +---------------------------------------------------------------------------+ - 16 B 4 B Link Length 4 B - | | - | | - -----------------------------| | - | | - | | - V | - Each <Link Info> will be serialized as | - +-----------------------------------------------+ | - | PGID | BASE_NAME_LENGTH | BASE_NAME | | - +-----------------------------------------------+ | - 16 B 4 B BASE_NAME_LENGTH | - | - | - ------------------------------------------------------------------------| - | - | - V - FOOTER is a magic number 0xBAADF00D indicating the end of the record. - This also serves as a serialized schema validator. - * ****************************************************************************/ - -#define GFDB_QUERY_RECORD_FOOTER 0xBAADF00D -#define UUID_LEN 16 - -/*Function to get the potential length of the serialized buffer*/ -static int32_t -gfdb_query_record_serialized_length(gfdb_query_record_t *query_record) -{ - int32_t len = -1; - gfdb_link_info_t *link_info = NULL; - - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out); - - /* Length of GFID */ - len = UUID_LEN; - - /* length of number of links*/ - len += sizeof(int32_t); - - list_for_each_entry(link_info, &query_record->link_list, list) - { - /* length of PFID */ - len += UUID_LEN; - - /* Add size of base name length*/ - len += sizeof(int32_t); - - /* Length of base_name */ - len += strlen(link_info->file_name); - } - - /* length of footer */ - len += sizeof(int32_t); -out: - return len; -} - -/* Function for serializing query record. - * - * Query Record Serialization Format - * +---------------------------------------------------------------------------+ - * | GFID | Link count | <LINK INFO> |..... | FOOTER | - * +---------------------------------------------------------------------------+ - * 16 B 4 B Link Length 4 B - * - * - * Each <Link Info> will be serialized as - * +-----------------------------------------------+ - * | PGID | BASE_NAME_LENGTH | BASE_NAME | - * +-----------------------------------------------+ - * 16 B 4 B BASE_NAME_LENGTH - * - * - * FOOTER is a magic number 0xBAADF00D indicating the end of the record. - * This also serves as a serialized schema validator. - * - * The function will allocate memory to the serialized buffer, - * the caller needs to free it. - * Returns the length of the serialized buffer on success - * or -1 on failure. - * - * */ -static int -gfdb_query_record_serialize(gfdb_query_record_t *query_record, char **in_buffer) -{ - gfdb_link_info_t *link_info = NULL; - int count = -1; - int base_name_len = 0; - int buffer_length = 0; - int footer = GFDB_QUERY_RECORD_FOOTER; - char *buffer = NULL; - char *ret_buffer = NULL; - - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (query_record->link_count > 0), out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, in_buffer, out); - - /* Calculate the total length of the serialized buffer */ - buffer_length = gfdb_query_record_serialized_length(query_record); - if (buffer_length <= 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed to calculate the length of " - "serialized buffer"); - goto out; - } - - /* Allocate memory to the serialized buffer */ - ret_buffer = GF_CALLOC(1, buffer_length, gf_common_mt_char); - if (!ret_buffer) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Memory allocation failed for " - "serialized buffer."); - goto out; - } - - buffer = ret_buffer; - - count = 0; - - /* Copying the GFID */ - memcpy(buffer, query_record->gfid, UUID_LEN); - buffer += UUID_LEN; - count += UUID_LEN; - - /* Copying the number of links */ - memcpy(buffer, &query_record->link_count, sizeof(int32_t)); - buffer += sizeof(int32_t); - count += sizeof(int32_t); - - list_for_each_entry(link_info, &query_record->link_list, list) - { - /* Copying the PFID */ - memcpy(buffer, link_info->pargfid, UUID_LEN); - buffer += UUID_LEN; - count += UUID_LEN; - - /* Copying base name length*/ - base_name_len = strlen(link_info->file_name); - memcpy(buffer, &base_name_len, sizeof(int32_t)); - buffer += sizeof(int32_t); - count += sizeof(int32_t); - - /* Length of base_name */ - memcpy(buffer, link_info->file_name, base_name_len); - buffer += base_name_len; - count += base_name_len; - } - - /* Copying the Footer of the record */ - memcpy(buffer, &footer, sizeof(int32_t)); - buffer += sizeof(int32_t); - count += sizeof(int32_t); - -out: - if (count < 0) { - GF_FREE(ret_buffer); - ret_buffer = NULL; - } - *in_buffer = ret_buffer; - return count; -} - -static gf_boolean_t -is_serialized_buffer_valid(char *in_buffer, int buffer_length) -{ - gf_boolean_t ret = _gf_false; - int footer = 0; - - /* Read the footer */ - in_buffer += (buffer_length - sizeof(int32_t)); - memcpy(&footer, in_buffer, sizeof(int32_t)); - - /* - * if the footer is not GFDB_QUERY_RECORD_FOOTER - * then the serialized record is invalid - * - * */ - if (footer != GFDB_QUERY_RECORD_FOOTER) { - goto out; - } - - ret = _gf_true; -out: - return ret; -} - -static int -gfdb_query_record_deserialize(char *in_buffer, int buffer_length, - gfdb_query_record_t **query_record) -{ - int ret = -1; - char *buffer = NULL; - int i = 0; - gfdb_link_info_t *link_info = NULL; - int count = 0; - int base_name_len = 0; - gfdb_query_record_t *ret_qrecord = NULL; - - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, in_buffer, out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (buffer_length > 0), out); - - if (!is_serialized_buffer_valid(in_buffer, buffer_length)) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Invalid serialized query record"); - goto out; - } - - buffer = in_buffer; - - ret_qrecord = gfdb_query_record_new(); - if (!ret_qrecord) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed to allocate space to " - "gfdb_query_record_t"); - goto out; - } - - /* READ GFID */ - memcpy((ret_qrecord)->gfid, buffer, UUID_LEN); - buffer += UUID_LEN; - count += UUID_LEN; - - /* Read the number of link */ - memcpy(&(ret_qrecord->link_count), buffer, sizeof(int32_t)); - buffer += sizeof(int32_t); - count += sizeof(int32_t); - - /* Read all the links */ - for (i = 0; i < ret_qrecord->link_count; i++) { - if (count >= buffer_length) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Invalid serialized " - "query record"); - ret = -1; - goto out; - } - - link_info = gfdb_link_info_new(); - if (!link_info) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed to create link_info"); - goto out; - } - - /* READ PGFID */ - memcpy(link_info->pargfid, buffer, UUID_LEN); - buffer += UUID_LEN; - count += UUID_LEN; - - /* Read base name length */ - memcpy(&base_name_len, buffer, sizeof(int32_t)); - buffer += sizeof(int32_t); - count += sizeof(int32_t); - - /* READ basename */ - memcpy(link_info->file_name, buffer, base_name_len); - buffer += base_name_len; - count += base_name_len; - link_info->file_name[base_name_len] = '\0'; - - /* Add link_info to the list */ - list_add_tail(&link_info->list, &(ret_qrecord->link_list)); - - /* Resetting link_info */ - link_info = NULL; - } - - ret = 0; -out: - if (ret) { - gfdb_query_record_free(ret_qrecord); - ret_qrecord = NULL; - } - *query_record = ret_qrecord; - return ret; -} - -/* Function to write query record to file - * - * Disk format - * +---------------------------------------------------------------------------+ - * | Length of serialized query record | Serialized Query Record | - * +---------------------------------------------------------------------------+ - * 4 bytes Length of serialized query record - * - * Please refer gfdb_query_record_serialize () for format of - * Serialized Query Record - * - * */ -int -gfdb_write_query_record(int fd, gfdb_query_record_t *query_record) -{ - int ret = -1; - int buffer_len = 0; - char *buffer = NULL; - int write_len = 0; - char *write_buffer = NULL; - - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (fd >= 0), out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out); - - buffer_len = gfdb_query_record_serialize(query_record, &buffer); - if (buffer_len < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed to serialize query record"); - goto out; - } - - /* Serialize the buffer length and write to file */ - ret = write(fd, &buffer_len, sizeof(int32_t)); - if (ret < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed to write buffer length" - " to file"); - goto out; - } - - /* Write the serialized query record to file */ - write_len = buffer_len; - write_buffer = buffer; - while ((ret = write(fd, write_buffer, write_len)) < write_len) { - if (ret < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, errno, LG_MSG_DB_ERROR, - "Failed to write serialized " - "query record to file"); - goto out; - } - - write_buffer += ret; - write_len -= ret; - } - - ret = 0; -out: - GF_FREE(buffer); - return ret; -} - -/* Function to read query record from file. - * Allocates memory to query record and - * returns length of serialized query record when successful - * Return -1 when failed. - * Return 0 when reached EOF. - * */ -int -gfdb_read_query_record(int fd, gfdb_query_record_t **query_record) -{ - int ret = -1; - int buffer_len = 0; - int read_len = 0; - char *buffer = NULL; - char *read_buffer = NULL; - - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (fd >= 0), out); - GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out); - - /* Read serialized query record length from the file*/ - ret = sys_read(fd, &buffer_len, sizeof(int32_t)); - if (ret < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed reading buffer length" - " from file"); - goto out; - } - /* EOF */ - else if (ret == 0) { - ret = 0; - goto out; - } - - /* Assumed sane range is 1B - 10MB */ - if ((buffer_len <= 0) || (buffer_len > (10 * 1024 * 1024))) { - ret = -1; - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "buffer length range is out of bound %d", buffer_len); - goto out; - } - - /* Allocating memory to the serialization buffer */ - buffer = GF_CALLOC(1, buffer_len, gf_common_mt_char); - if (!buffer) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed to allocate space to " - "serialized buffer"); - goto out; - } - - /* Read the serialized query record from file */ - read_len = buffer_len; - read_buffer = buffer; - while ((ret = sys_read(fd, read_buffer, read_len)) < read_len) { - /*Any error */ - if (ret < 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, errno, LG_MSG_DB_ERROR, - "Failed to read serialized " - "query record from file"); - goto out; - } - /* EOF */ - else if (ret == 0) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Invalid query record or " - "corrupted query file"); - ret = -1; - goto out; - } - - read_buffer += ret; - read_len -= ret; - } - - ret = gfdb_query_record_deserialize(buffer, buffer_len, query_record); - if (ret) { - gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "Failed to de-serialize query record"); - goto out; - } - - ret = buffer_len; -out: - GF_FREE(buffer); - return ret; -} diff --git a/libglusterfs/src/gfdb/gfdb_data_store_helper.h b/libglusterfs/src/gfdb/gfdb_data_store_helper.h deleted file mode 100644 index 7b4b0ae6aa1..00000000000 --- a/libglusterfs/src/gfdb/gfdb_data_store_helper.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef __GFDB_DATA_STORE_HELPER_H -#define __GFDB_DATA_STORE_HELPER_H - -#include <time.h> -#include <sys/time.h> -#include <string.h> -#include <fcntl.h> - -#include "common-utils.h" -#include "compat-uuid.h" -#include "gfdb_mem-types.h" -#include "dict.h" -#include "byte-order.h" -#include "libglusterfs-messages.h" - -#define GFDB_DATA_STORE "gfdbdatastore" - -/******************************************************************************* - * - * Query related data structure and functions - * - * ****************************************************************************/ - -#ifdef NAME_MAX -#define GF_NAME_MAX NAME_MAX -#else -#define GF_NAME_MAX 255 -#endif - -/*Structure to hold the link information*/ -typedef struct gfdb_link_info { - uuid_t pargfid; - char file_name[GF_NAME_MAX]; - struct list_head list; -} gfdb_link_info_t; - -/*Structure used for querying purpose*/ -typedef struct gfdb_query_record { - uuid_t gfid; - /*This is the hardlink list*/ - struct list_head link_list; - int link_count; -} gfdb_query_record_t; - -/*Create a single link info structure*/ -gfdb_link_info_t * -gfdb_link_info_new(); -typedef gfdb_link_info_t *(*gfdb_link_info_new_t)(); - -/*Destroy a link info structure*/ -void -gfdb_link_info_free(gfdb_link_info_t *gfdb_link_info); -typedef void (*gfdb_link_info_free_t)(gfdb_link_info_t *gfdb_link_info); - -/* Function to create the query_record */ -gfdb_query_record_t * -gfdb_query_record_new(); -typedef gfdb_query_record_t *(*gfdb_query_record_new_t)(); - -/* Function to add linkinfo to query record */ -int -gfdb_add_link_to_query_record(gfdb_query_record_t *gfdb_query_record, - uuid_t pgfid, char *base_name); -typedef int (*gfdb_add_link_to_query_record_t)(gfdb_query_record_t *, uuid_t, - char *); - -/*Function to destroy query record*/ -void -gfdb_query_record_free(gfdb_query_record_t *gfdb_query_record); -typedef void (*gfdb_query_record_free_t)(gfdb_query_record_t *); - -/* Function to write query record to file */ -int -gfdb_write_query_record(int fd, gfdb_query_record_t *gfdb_query_record); -typedef int (*gfdb_write_query_record_t)(int, gfdb_query_record_t *); - -/* Function to read query record from file. - * Allocates memory to query record and return 0 when successful - * Return -1 when failed. - * Return 0 when EOF. - * */ -int -gfdb_read_query_record(int fd, gfdb_query_record_t **gfdb_query_record); -typedef int (*gfdb_read_query_record_t)(int, gfdb_query_record_t **); - -#endif
\ No newline at end of file diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h deleted file mode 100644 index 5ee050d4fab..00000000000 --- a/libglusterfs/src/gfdb/gfdb_data_store_types.h +++ /dev/null @@ -1,532 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef __GFDB_DATA_STORE_TYPE_H -#define __GFDB_DATA_STORE_TYPE_H - -#include "gfdb_data_store_helper.h" - -/* - * Helps in dynamically choosing log level - * */ -static inline gf_loglevel_t -_gfdb_log_level(gf_loglevel_t given_level, gf_boolean_t ignore_level) -{ - return (ignore_level) ? GF_LOG_DEBUG : given_level; -} - -typedef enum gf_db_operation { - GFDB_INVALID_DB_OP = -1, - /* Query DB OPS : All the Query DB_OP should be added */ - /* in between START and END */ - GFDB_QUERY_DB_OP_START, /* Start of Query DB_OP */ - GFDB_QUERY_DB_OP, - GF_FTABLE_EXISTS_DB_OP, - GFDB_QUERY_DB_OP_END, /* End of Query DB_OP */ - /* Non-Query DB OPS */ - GFDB_DB_CREATE_DB_OP, - GFDB_GFID_EXIST_DB_OP, - GFDB_W_INSERT_DB_OP, - GFDB_WU_INSERT_DB_OP, - GFDB_W_UPDATE_DB_OP, - GFDB_WU_UPDATE_DB_OP, - GFDB_W_DELETE_DB_OP, - GFDB_UW_DELETE_DB_OP, - GFDB_WFC_UPDATE_DB_OP, - GFDB_RFC_UPDATE_DB_OP, - GFDB_DB_COMPACT_DB_OP /* Added for VACUUM/manual compaction support */ -} gf_db_operation_t; - -#define GF_COL_MAX_NUM 2 -#define GF_COL_ALL " * " - -/* Column/fields names used in the DB. - * If any new field is added should be updated here*/ -#define GF_COL_GF_ID "GF_ID" -#define GF_COL_GF_PID "GF_PID" -#define GF_COL_FILE_NAME "FNAME" -#define GF_COL_WSEC "W_SEC" -#define GF_COL_WMSEC "W_MSEC" -#define GF_COL_UWSEC "UW_SEC" -#define GF_COL_UWMSEC "UW_MSEC" -#define GF_COL_WSEC_READ "W_READ_SEC" -#define GF_COL_WMSEC_READ "W_READ_MSEC" -#define GF_COL_UWSEC_READ "UW_READ_SEC" -#define GF_COL_UWMSEC_READ "UW_READ_MSEC" -#define GF_COL_WDEL_FLAG "W_DEL_FLAG" -#define GF_COL_WRITE_FREQ_CNTR "WRITE_FREQ_CNTR" -#define GF_COL_READ_FREQ_CNTR "READ_FREQ_CNTR" -#define GF_COL_LINK_UPDATE "LINK_UPDATE" - -/***********************Time related********************************/ -/*1 sec = 1000000 microsec*/ -#define GFDB_MICROSEC 1000000 - -/*All the gfdb times are represented using this structure*/ -typedef struct timeval gfdb_time_t; - -/*Convert time into seconds*/ -static inline uint64_t -gfdb_time_2_usec(gfdb_time_t *gfdb_time) -{ - GF_ASSERT(gfdb_time); - return ((uint64_t)gfdb_time->tv_sec * GFDB_MICROSEC) + gfdb_time->tv_usec; -} - -/****************************************************************************** - * - * Insert/Update Record related data structures/functions - * - * ****************************************************************************/ - -/*Indicated a generic synchronous write to the db - * This may or may not be implemented*/ -typedef enum gfdb_sync_type { - GFDB_INVALID_SYNC = -1, - GFDB_DB_ASYNC, - GFDB_DB_SYNC -} gfdb_sync_type_t; - -/*Strings related to the abvove sync type*/ -#define GFDB_STR_DB_ASYNC "async" -#define GFDB_STR_DB_SYNC "sync" - -/*To convert sync type from string to gfdb_sync_type_t*/ -static inline int -gf_string2gfdbdbsync(char *sync_option) -{ - int ret = -1; - - if (!sync_option) - goto out; - if (strcmp(sync_option, GFDB_STR_DB_ASYNC) == 0) { - ret = GFDB_DB_ASYNC; - } else if (strcmp(sync_option, GFDB_STR_DB_SYNC) == 0) { - ret = GFDB_DB_SYNC; - } -out: - return ret; -} - -/*Indicated different types of db*/ -typedef enum gfdb_db_type { - GFDB_INVALID_DB = -1, - GFDB_HASH_FILE_STORE, - GFDB_ROCKS_DB, - GFDB_SQLITE3, - GFDB_HYPERDEX, - GFDB_DB_END /*Add DB type Entries above this only*/ -} gfdb_db_type_t; - -/*String related to the db types*/ -#define GFDB_STR_HASH_FILE_STORE "hashfile" -#define GFDB_STR_ROCKS_DB "rocksdb" -#define GFDB_STR_SQLITE3 "sqlite3" -#define GFDB_STR_HYPERDEX "hyperdex" - -/*Convert db type in string to gfdb_db_type_t*/ -static inline int -gf_string2gfdbdbtype(char *db_option) -{ - int ret = -1; - - if (!db_option) - goto out; - if (strcmp(db_option, GFDB_STR_HASH_FILE_STORE) == 0) { - ret = GFDB_HASH_FILE_STORE; - } else if (strcmp(db_option, GFDB_STR_ROCKS_DB) == 0) { - ret = GFDB_ROCKS_DB; - } else if (strcmp(db_option, GFDB_STR_SQLITE3) == 0) { - ret = GFDB_SQLITE3; - } else if (strcmp(db_option, GFDB_STR_HYPERDEX) == 0) { - ret = GFDB_HYPERDEX; - } -out: - return ret; -} - -/*Tells the path of the fop*/ -typedef enum gfdb_fop_path { - GFDB_FOP_INVALID = -1, - /*Filler value for zero*/ - GFDB_FOP_PATH_ZERO = 0, - /*have wind path below this*/ - GFDB_FOP_WIND = 1, - GFDB_FOP_WDEL = 2, - /*have unwind path below this*/ - GFDB_FOP_UNWIND = 4, - /*Delete unwind path*/ - GFDB_FOP_UNDEL = 8, - GFDB_FOP_UNDEL_ALL = 16 -} gfdb_fop_path_t; -/*Strings related to the above fop path*/ -#define GFDB_STR_FOP_INVALID "INVALID" -#define GFDB_STR_FOP_WIND "ENTRY" -#define GFDB_STR_FOP_UNWIND "EXIT" -#define GFDB_STR_FOP_WDEL "WDEL" -#define GFDB_STR_FOP_UNDEL "UNDEL" - -static inline gf_boolean_t -iswindpath(gfdb_fop_path_t gfdb_fop_path) -{ - return ((gfdb_fop_path == GFDB_FOP_WIND) || - (gfdb_fop_path == GFDB_FOP_WDEL)) - ? _gf_true - : _gf_false; -} - -static inline gf_boolean_t -isunwindpath(gfdb_fop_path_t gfdb_fop_path) -{ - return (gfdb_fop_path >= GFDB_FOP_UNWIND) ? _gf_true : _gf_false; -} - -/*Tell what type of fop it was - * Like whether a dentry fop or a inode fop - * Read fop or a write fop etc*/ -typedef enum gfdb_fop_type { - GFDB_FOP_INVALID_OP = -1, - /*Filler value for zero*/ - GFDB_FOP_TYPE_ZERO = 0, - GFDB_FOP_DENTRY_OP = 1, - GFDB_FOP_DENTRY_CREATE_OP = 2, - GFDB_FOP_INODE_OP = 4, - GFDB_FOP_WRITE_OP = 8, - GFDB_FOP_READ_OP = 16 -} gfdb_fop_type_t; - -#define GFDB_FOP_INODE_WRITE (GFDB_FOP_INODE_OP | GFDB_FOP_WRITE_OP) - -#define GFDB_FOP_DENTRY_WRITE (GFDB_FOP_DENTRY_OP | GFDB_FOP_WRITE_OP) - -#define GFDB_FOP_CREATE_WRITE (GFDB_FOP_DENTRY_CREATE_OP | GFDB_FOP_WRITE_OP) - -#define GFDB_FOP_INODE_READ (GFDB_FOP_INODE_OP | GFDB_FOP_READ_OP) - -static inline gf_boolean_t -isreadfop(gfdb_fop_type_t fop_type) -{ - return (fop_type & GFDB_FOP_READ_OP) ? _gf_true : _gf_false; -} - -static inline gf_boolean_t -isdentryfop(gfdb_fop_type_t fop_type) -{ - return ((fop_type & GFDB_FOP_DENTRY_OP) || - (fop_type & GFDB_FOP_DENTRY_CREATE_OP)) - ? _gf_true - : _gf_false; -} - -static inline gf_boolean_t -isdentrycreatefop(gfdb_fop_type_t fop_type) -{ - return (fop_type & GFDB_FOP_DENTRY_CREATE_OP) ? _gf_true : _gf_false; -} - -/*The structure that is used to send insert/update the databases - * using insert_db api*/ -typedef struct gfdb_db_record { - /* GFID */ - uuid_t gfid; - /* Used during a rename refer ctr_rename() in changetimerecorder - * xlator*/ - uuid_t old_gfid; - /* Parent GFID */ - uuid_t pargfid; - uuid_t old_pargfid; - /* File names */ - char file_name[GF_NAME_MAX + 1]; - char old_file_name[GF_NAME_MAX + 1]; - /* FOP type and FOP path*/ - gfdb_fop_type_t gfdb_fop_type; - gfdb_fop_path_t gfdb_fop_path; - /*Time of change or access*/ - gfdb_time_t gfdb_wind_change_time; - gfdb_time_t gfdb_unwind_change_time; - /* For crash consistency while inserting/updating hard links */ - gf_boolean_t islinkupdate; - /* For link consistency we do a double update i.e mark the link - * during the wind and during the unwind we update/delete the link. - * This has a performance hit. We give a choice here whether we need - * link consistency to be spoton or not using link_consistency flag. - * This will have only one link update */ - gf_boolean_t link_consistency; - /* For dentry fops we can choose to ignore recording of unwind time */ - /* For inode fops "record_exit" volume option does the trick, */ - /* but for dentry fops we update the LINK_UPDATE, so an extra */ - /* flag is provided to ignore the recording of the unwind time. */ - gf_boolean_t do_record_uwind_time; - /* Global flag to record or not record counters */ - gf_boolean_t do_record_counters; - /* Global flag to Record/Not Record wind or wind time. - * This flag will overrule do_record_uwind_time*/ - gf_boolean_t do_record_times; - /* Ignoring errors while inserting. - * */ - gf_boolean_t ignore_errors; -} gfdb_db_record_t; - -/******************************************************************************* - * - * Signatures for the plugin functions - * i.e Any plugin should implementment - * these functions to integrate with - * libgfdb. - * - * ****************************************************************************/ - -/*Call back function for querying the database*/ -typedef int (*gf_query_callback_t)(gfdb_query_record_t *, void *); - -/* Used to initialize db connection - * Arguments: - * args : Dictionary containing database specific parameters - * db_conn : pointer to plugin specific data base connection - * that will be created. If the call is successful - * db_conn will contain the plugin specific connection - * If call is unsuccessful will have NULL. - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_init_db_t)(dict_t *args, void **db_conn); - -/* Used to terminate/de-initialize db connection - * (Destructor function for db connection object) - * Arguments: - * db_conn : plugin specific data base connection - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_fini_db_t)(void **db_conn); - -/*Used to insert/updated records in the database - * Arguments: - * db_conn : plugin specific data base connection - * gfdb_db_record : Record to be inserted/updated - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_insert_record_t)(void *db_conn, gfdb_db_record_t *db_record); - -/*Used to delete record from the database - * Arguments: - * db_conn : plugin specific data base connection - * gfdb_db_record : Record to be deleted - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_delete_record_t)(void *db_conn, gfdb_db_record_t *db_record); - -/*Used to compact the database - * Arguments: - * db_conn : GFDB Connection node - * compact_active : Is compaction currently on? - * compact_mode_switched : Was the compaction switch flipped? - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_compact_db_t)(void *db_conn, gf_boolean_t compact_active, - gf_boolean_t compact_mode_switched); - -/* Query all the records from the database - * Arguments: - * db_conn : plugin specific data base connection - * query_callback : Call back function that will be called - * for every record found - * _query_cbk_args : Custom argument passed for the call back - * function query_callback - * query_limit : 0 - list all files - * positive value - add the LIMIT clause to - * the SQL query to limit the number of records - * returned - * - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_find_all_t)(void *db_conn, - gf_query_callback_t query_callback, - void *_cbk_args, int query_limit); - -/* Query records/files that have not changed/accessed - * from a time in past to current time - * Arguments: - * db_conn : plugin specific data base connection - * query_callback : Call back function that will be called - * for every record found - * _cbk_args : Custom argument passed for the call back - * function query_callback - * for_time : Time from where the file/s are not - * changed/accessed - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_find_unchanged_for_time_t)( - void *db_conn, gf_query_callback_t query_callback, void *_cbk_args, - gfdb_time_t *_time); - -/* Query records/files that have changed/accessed from a - * time in past to current time - * Arguments: - * db_conn : plugin specific data base connection - * query_callback : Call back function that will be called - * for every record found - * _cbk_args : Custom argument passed for the call back - * function query_callback - * _time : Time from where the file/s are - * changed/accessed - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_find_recently_changed_files_t)( - void *db_conn, gf_query_callback_t query_callback, void *_cbk_args, - gfdb_time_t *_time); - -/* Query records/files that have not changed/accessed - * from a time in past to current time, with - * a desired frequency - * - * Arguments: - * db_conn : plugin specific data base connection - * query_callback : Call back function that will be called - * for every record found - * _cbk_args : Custom argument passed for the call back - * function query_callback - * _time : Time from where the file/s are not - * changed/accessed - * _write_freq : Desired Write Frequency lower limit - * _read_freq : Desired Read Frequency lower limit - * _clear_counters : If true, Clears all the frequency counters of - * all files. - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_find_unchanged_for_time_freq_t)( - void *db_conn, gf_query_callback_t query_callback, void *_cbk_args, - gfdb_time_t *_time, int _write_freq, int _read_freq, - gf_boolean_t _clear_counters); - -/* Query records/files that have changed/accessed from a - * time in past to current time, with a desired frequency - * Arguments: - * db_conn : plugin specific data base connection - * query_callback : Call back function that will be called - * for every record found - * _cbk_args : Custom argument passed for the call back - * function query_callback - * _time : Time from where the file/s are - * changed/accessed - * _write_freq : Desired Write Frequency lower limit - * _read_freq : Desired Read Frequency lower limit - * _clear_counters : If true, Clears all the frequency counters of - * all files. - * Returns : if successful return 0 or - * -ve value in case of failure*/ -typedef int (*gfdb_find_recently_changed_files_freq_t)( - void *db_conn, gf_query_callback_t query_callback, void *_cbk_args, - gfdb_time_t *_time, int _write_freq, int _read_freq, - gf_boolean_t _clear_counters); - -typedef int (*gfdb_clear_files_heat_t)(void *db_conn); - -typedef int (*gfdb_get_db_version_t)(void *db_conn, char **version); - -typedef int (*gfdb_get_db_params_t)(void *db_conn, char *param_key, - char **param_value); - -typedef int (*gfdb_set_db_params_t)(void *db_conn, char *param_key, - char *param_value); - -/*Data structure holding all the above plugin function pointers*/ -typedef struct gfdb_db_operations { - gfdb_init_db_t init_db_op; - gfdb_fini_db_t fini_db_op; - gfdb_insert_record_t insert_record_op; - gfdb_delete_record_t delete_record_op; - gfdb_compact_db_t compact_db_op; - gfdb_find_all_t find_all_op; - gfdb_find_unchanged_for_time_t find_unchanged_for_time_op; - gfdb_find_recently_changed_files_t find_recently_changed_files_op; - gfdb_find_unchanged_for_time_freq_t find_unchanged_for_time_freq_op; - gfdb_find_recently_changed_files_freq_t find_recently_changed_files_freq_op; - gfdb_clear_files_heat_t clear_files_heat_op; - gfdb_get_db_version_t get_db_version; - gfdb_get_db_params_t get_db_params; - gfdb_set_db_params_t set_db_params; -} gfdb_db_operations_t; - -/******************************************************************************* - * - * Database connection object: This objected is maitained by libgfdb for each - * database connection created. - * gf_db_connection : DB connection specific to the plugin - * gfdb_db_operations : Contains all the libgfdb API implementation - * from the plugin. - * gfdb_db_type : Type of database - * - * ****************************************************************************/ - -typedef struct gfdb_connection { - void *gf_db_connection; - gfdb_db_operations_t gfdb_db_operations; - gfdb_db_type_t gfdb_db_type; -} gfdb_connection_t; - -/******************************************************************************* - * - * Macros for get and set db options - * - * ****************************************************************************/ - -/*Set param_key : str_value into param_dict*/ -#define SET_DB_PARAM_TO_DICT(comp_name, params_dict, param_key, str_value, \ - ret, error) \ - do { \ - data_t *data = NULL; \ - data = str_to_data(str_value); \ - if (!data) \ - goto error; \ - ret = dict_add(params_dict, param_key, data); \ - if (ret) { \ - gf_msg(comp_name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, \ - "Failed setting %s " \ - "to params dictionary", \ - param_key); \ - data_destroy(data); \ - goto error; \ - }; \ - } while (0) - -/*get str_value of param_key from param_dict*/ -#define GET_DB_PARAM_FROM_DICT(comp_name, params_dict, param_key, str_value, \ - error) \ - do { \ - data_t *data = NULL; \ - data = dict_get(params_dict, param_key); \ - if (!data) { \ - gf_msg(comp_name, GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED, \ - "Failed to retrieve " \ - "%s from params", \ - param_key); \ - goto error; \ - } else { \ - str_value = data->data; \ - }; \ - } while (0) - -/*get str_value of param_key from param_dict. if param_key is not present - * set _default_v to str_value */ -#define GET_DB_PARAM_FROM_DICT_DEFAULT(comp_name, params_dict, param_key, \ - str_value, _default_v) \ - do { \ - data_t *data = NULL; \ - data = dict_get(params_dict, param_key); \ - if (!data) { \ - str_value = _default_v; \ - gf_msg(comp_name, GF_LOG_TRACE, 0, LG_MSG_GET_PARAM_FAILED, \ - "Failed to retrieve " \ - "%s from params.Assigning default value: %s", \ - param_key, _default_v); \ - } else { \ - str_value = data->data; \ - }; \ - } while (0) - -#endif diff --git a/libglusterfs/src/gfdb/gfdb_mem-types.h b/libglusterfs/src/gfdb/gfdb_mem-types.h deleted file mode 100644 index 0ea543b7ce1..00000000000 --- a/libglusterfs/src/gfdb/gfdb_mem-types.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __GFDB_MEM_TYPES_H__ -#define __GFDB_MEM_TYPES_H__ - -#include "mem-types.h" - -enum gfdb_mem_types_ { gfdb_mtstart = gf_common_mt_end + 1, gfdb_mt_end }; -#endif diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c deleted file mode 100644 index 63957278e8a..00000000000 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.c +++ /dev/null @@ -1,1542 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "gfdb_sqlite3.h" -#include "gfdb_sqlite3_helper.h" -#include "libglusterfs-messages.h" -#include "syscall.h" - -/****************************************************************************** - * - * Util functions - * - * ***************************************************************************/ -gf_sql_connection_t * -gf_sql_connection_init() -{ - gf_sql_connection_t *gf_sql_conn = NULL; - - gf_sql_conn = GF_CALLOC(1, sizeof(gf_sql_connection_t), - gf_mt_sql_connection_t); - if (gf_sql_conn == NULL) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "Error allocating memory to " - "gf_sql_connection_t "); - } - - return gf_sql_conn; -} - -void -gf_sql_connection_fini(gf_sql_connection_t **sql_connection) -{ - if (!sql_connection) - return; - GF_FREE(*sql_connection); - *sql_connection = NULL; -} - -const char * -gf_sql_jm2str(gf_sql_journal_mode_t jm) -{ - switch (jm) { - case gf_sql_jm_delete: - return GF_SQL_JM_DELETE; - case gf_sql_jm_truncate: - return GF_SQL_JM_TRUNCATE; - case gf_sql_jm_persist: - return GF_SQL_JM_PERSIST; - case gf_sql_jm_memory: - return GF_SQL_JM_MEMORY; - case gf_sql_jm_wal: - return GF_SQL_JM_WAL; - case gf_sql_jm_off: - return GF_SQL_JM_OFF; - case gf_sql_jm_invalid: - break; - } - return NULL; -} - -gf_sql_journal_mode_t -gf_sql_str2jm(const char *jm_str) -{ - if (!jm_str) { - return gf_sql_jm_invalid; - } else if (strcmp(jm_str, GF_SQL_JM_DELETE) == 0) { - return gf_sql_jm_delete; - } else if (strcmp(jm_str, GF_SQL_JM_TRUNCATE) == 0) { - return gf_sql_jm_truncate; - } else if (strcmp(jm_str, GF_SQL_JM_PERSIST) == 0) { - return gf_sql_jm_persist; - } else if (strcmp(jm_str, GF_SQL_JM_MEMORY) == 0) { - return gf_sql_jm_memory; - } else if (strcmp(jm_str, GF_SQL_JM_WAL) == 0) { - return gf_sql_jm_wal; - } else if (strcmp(jm_str, GF_SQL_JM_OFF) == 0) { - return gf_sql_jm_off; - } - return gf_sql_jm_invalid; -} - -const char * -gf_sql_av_t2str(gf_sql_auto_vacuum_t sql_av) -{ - switch (sql_av) { - case gf_sql_av_none: - return GF_SQL_AV_NONE; - case gf_sql_av_full: - return GF_SQL_AV_FULL; - case gf_sql_av_incr: - return GF_SQL_AV_INCR; - case gf_sql_av_invalid: - break; - } - return NULL; -} - -gf_sql_auto_vacuum_t -gf_sql_str2av_t(const char *av_str) -{ - if (!av_str) { - return gf_sql_av_invalid; - } else if (strcmp(av_str, GF_SQL_AV_NONE) == 0) { - return gf_sql_av_none; - } else if (strcmp(av_str, GF_SQL_AV_FULL) == 0) { - return gf_sql_av_full; - } else if (strcmp(av_str, GF_SQL_AV_INCR) == 0) { - return gf_sql_av_incr; - } - return gf_sql_av_invalid; -} - -const char * -gf_sync_t2str(gf_sql_sync_t sql_sync) -{ - switch (sql_sync) { - case gf_sql_sync_off: - return GF_SQL_SYNC_OFF; - case gf_sql_sync_normal: - return GF_SQL_SYNC_NORMAL; - case gf_sql_sync_full: - return GF_SQL_SYNC_FULL; - case gf_sql_sync_invalid: - break; - } - return NULL; -} - -gf_sql_sync_t -gf_sql_str2sync_t(const char *sync_str) -{ - if (!sync_str) { - return gf_sql_sync_invalid; - } else if (strcmp(sync_str, GF_SQL_SYNC_OFF) == 0) { - return gf_sql_sync_off; - } else if (strcmp(sync_str, GF_SQL_SYNC_NORMAL) == 0) { - return gf_sql_sync_normal; - } else if (strcmp(sync_str, GF_SQL_SYNC_FULL) == 0) { - return gf_sql_sync_full; - } - return gf_sql_sync_invalid; -} - -/*TODO replace GF_CALLOC by mem_pool or iobuff if required for performance */ -static char * -sql_stmt_init() -{ - char *sql_stmt = NULL; - - sql_stmt = GF_CALLOC(GF_STMT_SIZE_MAX, sizeof(char), gf_common_mt_char); - - if (!sql_stmt) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, - "Error allocating memory to SQL " - "Statement "); - goto out; - } -out: - return sql_stmt; -} - -/*TODO replace GF_FREE by mem_pool or iobuff if required for performance */ -static void -sql_stmt_fini(char **sql_stmt) -{ - GF_FREE(*sql_stmt); -} - -/****************************************************************************** - * DB Essential functions used by - * > gf_open_sqlite3_conn () - * > gf_close_sqlite3_conn () - * ***************************************************************************/ -static sqlite3 * -gf_open_sqlite3_conn(char *sqlite3_db_path, int flags) -{ - sqlite3 *sqlite3_db_conn = NULL; - int ret = -1; - - GF_ASSERT(sqlite3_db_path); - - /*Creates DB if not created*/ - ret = sqlite3_open_v2(sqlite3_db_path, &sqlite3_db_conn, flags, NULL); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR, - "FATAL: Could open %s : %s", sqlite3_db_path, - sqlite3_errmsg(sqlite3_db_conn)); - } - return sqlite3_db_conn; -} - -static int -gf_close_sqlite3_conn(sqlite3 *sqlite3_db_conn) -{ - int ret = 0; - - GF_ASSERT(sqlite3_db_conn); - - if (sqlite3_db_conn) { - ret = sqlite3_close(sqlite3_db_conn); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CONNECTION_ERROR, - "FATAL: sqlite3 close" - " connection failed %s", - sqlite3_errmsg(sqlite3_db_conn)); - ret = -1; - goto out; - } - } - ret = 0; -out: - return ret; -} - -/****************************************************************************** - * - * Database init / fini / create table - * - * ***************************************************************************/ - -/*Function to fill db operations*/ -void -gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops) -{ - GF_ASSERT(gfdb_db_ops); - - gfdb_db_ops->init_db_op = gf_sqlite3_init; - gfdb_db_ops->fini_db_op = gf_sqlite3_fini; - - gfdb_db_ops->insert_record_op = gf_sqlite3_insert; - gfdb_db_ops->delete_record_op = gf_sqlite3_delete; - gfdb_db_ops->compact_db_op = gf_sqlite3_vacuum; - - gfdb_db_ops->find_all_op = gf_sqlite3_find_all; - gfdb_db_ops - ->find_unchanged_for_time_op = gf_sqlite3_find_unchanged_for_time; - gfdb_db_ops->find_recently_changed_files_op = - gf_sqlite3_find_recently_changed_files; - gfdb_db_ops->find_unchanged_for_time_freq_op = - gf_sqlite3_find_unchanged_for_time_freq; - gfdb_db_ops->find_recently_changed_files_freq_op = - gf_sqlite3_find_recently_changed_files_freq; - - gfdb_db_ops->clear_files_heat_op = gf_sqlite3_clear_files_heat; - - gfdb_db_ops->get_db_version = gf_sqlite3_version; - - gfdb_db_ops->get_db_params = gf_sqlite3_pragma; - - gfdb_db_ops->set_db_params = gf_sqlite3_set_pragma; -} - -static int -create_filetable(sqlite3 *sqlite3_db_conn) -{ - int ret = -1; - char *sql_stmt = NULL; - char *sql_strerror = NULL; - - GF_ASSERT(sqlite3_db_conn); - - sql_stmt = sql_stmt_init(); - if (!sql_stmt) { - ret = ENOMEM; - goto out; - } - - GF_CREATE_STMT(sql_stmt); - - ret = sqlite3_exec(sqlite3_db_conn, sql_stmt, NULL, NULL, &sql_strerror); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_EXEC_FAILED, - "Failed executing: %s : %s", sql_stmt, sql_strerror); - sqlite3_free(sql_strerror); - ret = -1; - goto out; - } - - ret = 0; -out: - sql_stmt_fini(&sql_stmt); - return ret; -} - -static int -apply_sql_params_db(gf_sql_connection_t *sql_conn, dict_t *param_dict) -{ - int ret = -1; - char *temp_str = NULL; - char sqlite3_config_str[GF_NAME_MAX] = ""; - - GF_ASSERT(sql_conn); - GF_ASSERT(param_dict); - - /*Extract sql page_size from param_dict, - * if not specified default value will be GF_SQL_DEFAULT_PAGE_SIZE*/ - temp_str = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict, - GFDB_SQL_PARAM_PAGE_SIZE, temp_str, - GF_SQL_DEFAULT_PAGE_SIZE); - sql_conn->page_size = atoi(temp_str); - /*Apply page_size on the sqlite db*/ - GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "page_size", "%zd", - sql_conn->page_size, ret, out); - - /*Extract sql cache size from param_dict, - * if not specified default value will be - * GF_SQL_DEFAULT_CACHE_SIZE pages*/ - temp_str = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict, - GFDB_SQL_PARAM_CACHE_SIZE, temp_str, - GF_SQL_DEFAULT_CACHE_SIZE); - sql_conn->cache_size = atoi(temp_str); - /*Apply cache size on the sqlite db*/ - GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "cache_size", "%zd", - sql_conn->cache_size, ret, out); - - /*Extract sql journal mode from param_dict, - * if not specified default value will be - * GF_SQL_DEFAULT_JOURNAL_MODE i.e "wal"*/ - temp_str = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict, - GFDB_SQL_PARAM_JOURNAL_MODE, temp_str, - GF_SQL_DEFAULT_JOURNAL_MODE); - sql_conn->journal_mode = gf_sql_str2jm(temp_str); - /*Apply journal mode to the sqlite db*/ - GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "journal_mode", "%s", temp_str, - ret, out); - - /*Only when the journal mode is WAL, wal_autocheckpoint makes sense*/ - if (sql_conn->journal_mode == gf_sql_jm_wal) { - /*Extract sql wal auto check point from param_dict - * if not specified default value will be - * GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT pages*/ - temp_str = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict, - GFDB_SQL_PARAM_WAL_AUTOCHECK, temp_str, - GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT); - sql_conn->wal_autocheckpoint = atoi(temp_str); - /*Apply wal auto check point to the sqlite db*/ - GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "wal_autocheckpoint", "%zd", - sql_conn->wal_autocheckpoint, ret, out); - } - - /*Extract sql synchronous from param_dict - * if not specified default value will be GF_SQL_DEFAULT_SYNC*/ - temp_str = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict, - GFDB_SQL_PARAM_SYNC, temp_str, - GF_SQL_DEFAULT_SYNC); - sql_conn->synchronous = gf_sql_str2sync_t(temp_str); - /*Apply synchronous to the sqlite db*/ - GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "synchronous", "%d", - sql_conn->synchronous, ret, out); - - /*Extract sql auto_vacuum from param_dict - * if not specified default value will be GF_SQL_DEFAULT_AUTO_VACUUM*/ - temp_str = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict, - GFDB_SQL_PARAM_AUTO_VACUUM, temp_str, - GF_SQL_DEFAULT_AUTO_VACUUM); - sql_conn->auto_vacuum = gf_sql_str2av_t(temp_str); - /*Apply auto_vacuum to the sqlite db*/ - GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "auto_vacuum", "%d", - sql_conn->auto_vacuum, ret, out); - - ret = 0; -out: - return ret; -} - -int -gf_sqlite3_init(dict_t *args, void **db_conn) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = NULL; - struct stat stbuf = { - 0, - }; - gf_boolean_t is_dbfile_exist = _gf_false; - char *temp_str = NULL; - - GF_ASSERT(args); - GF_ASSERT(db_conn); - - if (*db_conn != NULL) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CONNECTION_ERROR, - "DB Connection is not " - "empty!"); - return 0; - } - - if (!sqlite3_threadsafe()) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_NOT_MULTITHREAD_MODE, - "sqlite3 is not in multithreaded mode"); - goto out; - } - - sql_conn = gf_sql_connection_init(); - if (!sql_conn) { - goto out; - } - - /*Extract sql db path from args*/ - temp_str = NULL; - GET_DB_PARAM_FROM_DICT(GFDB_STR_SQLITE3, args, GFDB_SQL_PARAM_DBPATH, - temp_str, out); - strncpy(sql_conn->sqlite3_db_path, temp_str, PATH_MAX - 1); - sql_conn->sqlite3_db_path[PATH_MAX - 1] = 0; - - is_dbfile_exist = (sys_stat(sql_conn->sqlite3_db_path, &stbuf) == 0) - ? _gf_true - : _gf_false; - - /*Creates DB if not created*/ - sql_conn->sqlite3_db_conn = gf_open_sqlite3_conn( - sql_conn->sqlite3_db_path, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE); - if (!sql_conn->sqlite3_db_conn) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CONNECTION_ERROR, - "Failed creating db connection"); - goto out; - } - - /* If the file exist we skip the config part - * and creation of the schema */ - if (is_dbfile_exist) - goto db_exists; - - /*Apply sqlite3 params to database*/ - ret = apply_sql_params_db(sql_conn, args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed applying sql params" - " to %s", - sql_conn->sqlite3_db_path); - goto out; - } - - /*Create the schema if NOT present*/ - ret = create_filetable(sql_conn->sqlite3_db_conn); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Failed Creating %s Table", GF_FILE_TABLE); - goto out; - } - -db_exists: - ret = 0; -out: - if (ret) { - gf_sqlite3_fini((void **)&sql_conn); - } - - *db_conn = sql_conn; - - return ret; -} - -int -gf_sqlite3_fini(void **db_conn) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = NULL; - - GF_ASSERT(db_conn); - sql_conn = *db_conn; - - if (sql_conn) { - if (sql_conn->sqlite3_db_conn) { - ret = gf_close_sqlite3_conn(sql_conn->sqlite3_db_conn); - if (ret) { - /*Logging of error done in - * gf_close_sqlite3_conn()*/ - goto out; - } - sql_conn->sqlite3_db_conn = NULL; - } - gf_sql_connection_fini(&sql_conn); - } - *db_conn = sql_conn; - ret = 0; -out: - return ret; -} - -/****************************************************************************** - * - * INSERT/UPDATE/DELETE Operations - * - * - * ***************************************************************************/ - -int -gf_sqlite3_insert(void *db_conn, gfdb_db_record_t *gfdb_db_record) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = db_conn; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out); - - switch (gfdb_db_record->gfdb_fop_path) { - case GFDB_FOP_WIND: - ret = gf_sql_insert_wind(sql_conn, gfdb_db_record); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_INSERT_FAILED, "Failed wind insert"); - goto out; - } - break; - case GFDB_FOP_UNWIND: - ret = gf_sql_insert_unwind(sql_conn, gfdb_db_record); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_INSERT_FAILED, "Failed unwind insert"); - goto out; - } - break; - - case GFDB_FOP_WDEL: - ret = gf_sql_update_delete_wind(sql_conn, gfdb_db_record); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, - "Failed updating delete " - "during wind"); - goto out; - } - break; - case GFDB_FOP_UNDEL: - case GFDB_FOP_UNDEL_ALL: - ret = gf_sql_delete_unwind(sql_conn, gfdb_db_record); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_DELETE_FAILED, "Failed deleting"); - goto out; - } - break; - case GFDB_FOP_INVALID: - default: - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_INVALID_FOP, - "Cannot record to DB: Invalid FOP"); - goto out; - } - - ret = 0; -out: - return ret; -} - -int -gf_sqlite3_delete(void *db_conn, gfdb_db_record_t *gfdb_db_record) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = db_conn; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out); - - ret = 0; -out: - return ret; -} - -/****************************************************************************** - * - * SELECT QUERY FUNCTIONS - * - * - * ***************************************************************************/ - -static int -gf_get_basic_query_stmt(char **out_stmt) -{ - int ret = -1; - ret = gf_asprintf(out_stmt, - "select GF_FILE_TB.GF_ID," - "GF_FLINK_TB.GF_PID ," - "GF_FLINK_TB.FNAME " - "from GF_FLINK_TB, GF_FILE_TB " - "where " - "GF_FILE_TB.GF_ID = GF_FLINK_TB.GF_ID "); - if (ret <= 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed to create base query statement"); - *out_stmt = NULL; - } - return ret; -} - -/* - * Find All files recorded in the DB - * Input: - * query_callback : query callback function to handle - * result records from the query - * */ -int -gf_sqlite3_find_all(void *db_conn, gf_query_callback_t query_callback, - void *query_cbk_args, int query_limit) -{ - int ret = -1; - char *query_str = NULL; - gf_sql_connection_t *sql_conn = db_conn; - sqlite3_stmt *prep_stmt = NULL; - char *limit_query = NULL; - char *query = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); - - ret = gf_get_basic_query_stmt(&query_str); - if (ret <= 0) { - goto out; - } - - query = query_str; - - if (query_limit > 0) { - ret = gf_asprintf(&limit_query, "%s LIMIT %d", query, query_limit); - if (ret < 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed creating limit query statement"); - limit_query = NULL; - goto out; - } - - query = limit_query; - } - - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query, -1, &prep_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed to prepare statement %s: %s", query, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed Query %s", query); - goto out; - } - - ret = 0; -out: - sqlite3_finalize(prep_stmt); - GF_FREE(query_str); - - if (limit_query) - GF_FREE(limit_query); - - return ret; -} - -/* - * Find recently changed files from the DB - * Input: - * query_callback : query callback function to handle - * result records from the query - * from_time : Time to define what is recent - * */ -int -gf_sqlite3_find_recently_changed_files(void *db_conn, - gf_query_callback_t query_callback, - void *query_cbk_args, - gfdb_time_t *from_time) -{ - int ret = -1; - char *query_str = NULL; - gf_sql_connection_t *sql_conn = db_conn; - sqlite3_stmt *prep_stmt = NULL; - uint64_t from_time_usec = 0; - char *base_query_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); - - ret = gf_get_basic_query_stmt(&base_query_str); - if (ret <= 0) { - goto out; - } - - ret = gf_asprintf( - &query_str, - "%s AND" - /*First condition: For writes*/ - "( ((" GF_COL_TB_WSEC " * " TOSTRING( - GFDB_MICROSEC) " + " GF_COL_TB_WMSEC - ") >= ? )" - " OR " - /*Second condition: For reads*/ - "((" GF_COL_TB_RWSEC " * " TOSTRING( - GFDB_MICROSEC) " + " GF_COL_TB_RWMSEC - ") >= ?) )" - /* Order by write wind time in a - * descending order i.e most hot - * files w.r.t to write */ - " ORDER BY GF_FILE_TB.W_SEC DESC", - base_query_str); - - if (ret < 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed creating query statement"); - query_str = NULL; - goto out; - } - - from_time_usec = gfdb_time_2_usec(from_time); - - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt, - 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed to prepare statement %s :" - " %s", - query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind write wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 1, from_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind from_time_usec " - "%" PRIu64 " : %s", - from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind read wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 2, from_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind from_time_usec " - "%" PRIu64 " : %s ", - from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the query*/ - ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed Query %s", query_str); - goto out; - } - - ret = 0; -out: - sqlite3_finalize(prep_stmt); - GF_FREE(base_query_str); - GF_FREE(query_str); - return ret; -} - -/* - * Find unchanged files from a specified time from the DB - * Input: - * query_callback : query callback function to handle - * result records from the query - * for_time : Time from where the file/s are not changed - * */ -int -gf_sqlite3_find_unchanged_for_time(void *db_conn, - gf_query_callback_t query_callback, - void *query_cbk_args, gfdb_time_t *for_time) -{ - int ret = -1; - char *query_str = NULL; - gf_sql_connection_t *sql_conn = db_conn; - sqlite3_stmt *prep_stmt = NULL; - uint64_t for_time_usec = 0; - char *base_query_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); - - ret = gf_get_basic_query_stmt(&base_query_str); - if (ret <= 0) { - goto out; - } - - ret = gf_asprintf( - &query_str, - "%s AND " - /*First condition: For writes*/ - "( ((" GF_COL_TB_WSEC " * " TOSTRING( - GFDB_MICROSEC) " + " GF_COL_TB_WMSEC - ") <= ? )" - " AND " - /*Second condition: For reads*/ - "((" GF_COL_TB_RWSEC " * " TOSTRING( - GFDB_MICROSEC) " + " GF_COL_TB_RWMSEC - ") <= ?) )" - /* Order by write wind time in a - * ascending order i.e most cold - * files w.r.t to write */ - " ORDER BY GF_FILE_TB.W_SEC ASC", - base_query_str); - - if (ret < 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed to create query statement"); - query_str = NULL; - goto out; - } - - for_time_usec = gfdb_time_2_usec(for_time); - - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt, - 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed to prepare statement %s :" - " %s", - query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind write wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 1, for_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind for_time_usec " - "%" PRIu64 " : %s", - for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind read wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 2, for_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind for_time_usec " - "%" PRIu64 " : %s", - for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the query*/ - ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed Query %s", query_str); - goto out; - } - - ret = 0; -out: - sqlite3_finalize(prep_stmt); - GF_FREE(base_query_str); - GF_FREE(query_str); - return ret; -} - -/* - * Find recently changed files with a specific frequency from the DB - * Input: - * db_conn : db connection object - * query_callback : query callback function to handle - * result records from the query - * from_time : Time to define what is recent - * freq_write_cnt : Frequency thresold for write - * freq_read_cnt : Frequency thresold for read - * clear_counters : Clear counters (r/w) for all inodes in DB - * */ -int -gf_sqlite3_find_recently_changed_files_freq( - void *db_conn, gf_query_callback_t query_callback, void *query_cbk_args, - gfdb_time_t *from_time, int freq_write_cnt, int freq_read_cnt, - gf_boolean_t clear_counters) -{ - int ret = -1; - char *query_str = NULL; - gf_sql_connection_t *sql_conn = db_conn; - sqlite3_stmt *prep_stmt = NULL; - uint64_t from_time_usec = 0; - char *base_query_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); - - ret = gf_get_basic_query_stmt(&base_query_str); - if (ret <= 0) { - goto out; - } - ret = gf_asprintf( - &query_str, - "%s AND " - /*First condition: For Writes*/ - "( ( ((" GF_COL_TB_WSEC " * " TOSTRING( - GFDB_MICROSEC) " + " GF_COL_TB_WMSEC - ") >= ? )" - " AND " - " (" GF_COL_TB_WFC - " >= ? ) )" - " OR " - /*Second condition: For Reads */ - "( ((" GF_COL_TB_RWSEC " * " TOSTRING( - GFDB_MICROSEC) " + " GF_COL_TB_RWMSEC - ") >= ?)" - " AND " - " (" GF_COL_TB_RFC - " >= ? ) ) )" - /* Order by write wind time and - * write freq in a descending - * order - * i.e most hot files w.r.t to - * write */ - " ORDER BY GF_FILE_TB.W_SEC " - "DESC, " - "GF_FILE_TB.WRITE_FREQ_CNTR DESC", - base_query_str); - - if (ret < 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed to create query statement"); - query_str = NULL; - goto out; - } - - from_time_usec = gfdb_time_2_usec(from_time); - - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt, - 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed to prepare statement %s :" - " %s", - query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind write wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 1, from_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind from_time_usec " - "%" PRIu64 " : %s", - from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind write frequency thresold*/ - ret = sqlite3_bind_int(prep_stmt, 2, freq_write_cnt); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind freq_write_cnt " - "%d : %s", - freq_write_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind read wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 3, from_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind from_time_usec " - "%" PRIu64 " : %s", - from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind read frequency thresold*/ - ret = sqlite3_bind_int(prep_stmt, 4, freq_read_cnt); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind freq_read_cnt " - "%d : %s", - freq_read_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the query*/ - ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed Query %s", query_str); - goto out; - } - - /*Clear counters*/ - if (clear_counters) { - ret = gf_sql_clear_counters(sql_conn); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_CLEAR_COUNTER_FAILED, - "Failed to clear" - " counters!"); - goto out; - } - } - ret = 0; -out: - sqlite3_finalize(prep_stmt); - GF_FREE(base_query_str); - GF_FREE(query_str); - return ret; -} - -/* - * Find unchanged files from a specified time, w.r.t to frequency, from the DB - * Input: - * query_callback : query callback function to handle - * result records from the query - * for_time : Time from where the file/s are not changed - * freq_write_cnt : Frequency thresold for write - * freq_read_cnt : Frequency thresold for read - * clear_counters : Clear counters (r/w) for all inodes in DB - * */ -int -gf_sqlite3_find_unchanged_for_time_freq(void *db_conn, - gf_query_callback_t query_callback, - void *query_cbk_args, - gfdb_time_t *for_time, - int freq_write_cnt, int freq_read_cnt, - gf_boolean_t clear_counters) -{ - int ret = -1; - char *query_str = NULL; - gf_sql_connection_t *sql_conn = db_conn; - sqlite3_stmt *prep_stmt = NULL; - uint64_t for_time_usec = 0; - char *base_query_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); - - ret = gf_get_basic_query_stmt(&base_query_str); - if (ret <= 0) { - goto out; - } - - ret = gf_asprintf (&query_str, "%s AND " - /*First condition: For Writes - * Files that have write wind time smaller than for_time - * OR - * File that have write wind time greater than for_time, - * but write_frequency less than freq_write_cnt*/ - "( ( ((" GF_COL_TB_WSEC " * " TOSTRING(GFDB_MICROSEC) " + " - GF_COL_TB_WMSEC ") < ? )" - " OR " - "( (" GF_COL_TB_WFC " < ? ) AND" - "((" GF_COL_TB_WSEC " * " TOSTRING(GFDB_MICROSEC) " + " - GF_COL_TB_WMSEC ") >= ? ) ) )" - " AND " - /*Second condition: For Reads - * Files that have read wind time smaller than for_time - * OR - * File that have read wind time greater than for_time, - * but read_frequency less than freq_read_cnt*/ - "( ((" GF_COL_TB_RWSEC " * " TOSTRING(GFDB_MICROSEC) " + " - GF_COL_TB_RWMSEC ") < ? )" - " OR " - "( (" GF_COL_TB_RFC " < ? ) AND" - "((" GF_COL_TB_RWSEC " * " TOSTRING(GFDB_MICROSEC) " + " - GF_COL_TB_RWMSEC ") >= ? ) ) ) )" - /* Order by write wind time and write freq in ascending order - * i.e most cold files w.r.t to write */ - " ORDER BY GF_FILE_TB.W_SEC ASC, " - "GF_FILE_TB.WRITE_FREQ_CNTR ASC", - base_query_str); - - if (ret < 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed to create query statement"); - query_str = NULL; - goto out; - } - - for_time_usec = gfdb_time_2_usec(for_time); - - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt, - 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed to prepare delete " - "statement %s : %s", - query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind write wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 1, for_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind for_time_usec " - "%" PRIu64 " : %s", - for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind write frequency thresold*/ - ret = sqlite3_bind_int(prep_stmt, 2, freq_write_cnt); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind freq_write_cnt" - " %d : %s", - freq_write_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind write wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 3, for_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind for_time_usec " - "%" PRIu64 " : %s", - for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind read wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 4, for_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind for_time_usec " - "%" PRIu64 " : %s", - for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind read frequency thresold*/ - ret = sqlite3_bind_int(prep_stmt, 5, freq_read_cnt); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind freq_read_cnt " - "%d : %s", - freq_read_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind read wind time*/ - ret = sqlite3_bind_int64(prep_stmt, 6, for_time_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed to bind for_time_usec " - "%" PRIu64 " : %s", - for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the query*/ - ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed Query %s", query_str); - goto out; - } - - /*Clear counters*/ - if (clear_counters) { - ret = gf_sql_clear_counters(sql_conn); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_CLEAR_COUNTER_FAILED, - "Failed to clear " - "counters!"); - goto out; - } - } - - ret = 0; -out: - sqlite3_finalize(prep_stmt); - GF_FREE(base_query_str); - GF_FREE(query_str); - return ret; -} - -int -gf_sqlite3_clear_files_heat(void *db_conn) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = db_conn; - - CHECK_SQL_CONN(sql_conn, out); - - ret = gf_sql_clear_counters(sql_conn); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CLEAR_COUNTER_FAILED, - "Failed to clear " - "files heat"); - goto out; - } - - ret = 0; -out: - return ret; -} - -/* Function to extract version of sqlite db - * Input: - * void *db_conn : Sqlite connection - * char **version : the version is extracted as a string and will be stored in - * this variable. The freeing of the memory should be done by - * the caller. - * Return: - * On success return the length of the version string that is - * extracted. - * On failure return -1 - * */ -int -gf_sqlite3_version(void *db_conn, char **version) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = db_conn; - sqlite3_stmt *pre_stmt = NULL; - - CHECK_SQL_CONN(sql_conn, out); - - ret = sqlite3_prepare_v2(sql_conn->sqlite3_db_conn, - "SELECT SQLITE_VERSION()", -1, &pre_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed init prepare stmt %s", sqlite3_errmsg(db_conn)); - ret = -1; - goto out; - } - - ret = sqlite3_step(pre_stmt); - if (ret != SQLITE_ROW) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED, - "Failed to get records " - "from db : %s", - sqlite3_errmsg(db_conn)); - ret = -1; - goto out; - } - - ret = gf_asprintf(version, "%s", sqlite3_column_text(pre_stmt, 0)); - if (ret <= 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed extracting version"); - } - -out: - sqlite3_finalize(pre_stmt); - - return ret; -} - -/* Function to extract PRAGMA from sqlite db - * Input: - * void *db_conn : Sqlite connection - * char *pragma_key : PRAGMA or setting to be extracted - * char **pragma_value : the value of the PRAGMA or setting that is - * extracted. This function will allocate memory - * to pragma_value. The caller should free the memory - * Return: - * On success return the length of the pragma/setting value that is - * extracted. - * On failure return -1 - * */ -int -gf_sqlite3_pragma(void *db_conn, char *pragma_key, char **pragma_value) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = db_conn; - sqlite3_stmt *pre_stmt = NULL; - char *sqlstring = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pragma_key, out); - - ret = gf_asprintf(&sqlstring, "PRAGMA %s;", pragma_key); - if (ret <= 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed allocating memory"); - goto out; - } - - ret = sqlite3_prepare_v2(sql_conn->sqlite3_db_conn, sqlstring, -1, - &pre_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed init prepare stmt %s", sqlite3_errmsg(db_conn)); - ret = -1; - goto out; - } - - ret = sqlite3_step(pre_stmt); - if (ret != SQLITE_ROW) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED, - "Failed to get records " - "from db : %s", - sqlite3_errmsg(db_conn)); - ret = -1; - goto out; - } - - if (pragma_value) { - ret = gf_asprintf(pragma_value, "%s", sqlite3_column_text(pre_stmt, 0)); - if (ret <= 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed to get %s from db", pragma_key); - } - } - - ret = 0; -out: - GF_FREE(sqlstring); - - sqlite3_finalize(pre_stmt); - - return ret; -} - -/* Function to set PRAGMA to sqlite db - * Input: - * void *db_conn : Sqlite connection - * char *pragma_key : PRAGMA to be set - * char *pragma_value : the value of the PRAGMA - * Return: - * On success return 0 - * On failure return -1 - * */ -int -gf_sqlite3_set_pragma(void *db_conn, char *pragma_key, char *pragma_value) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = db_conn; - char sqlstring[GF_NAME_MAX] = ""; - char *db_pragma_value = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pragma_key, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pragma_value, out); - - GF_SQLITE3_SET_PRAGMA(sqlstring, pragma_key, "%s", pragma_value, ret, out); - - ret = gf_sqlite3_pragma(db_conn, pragma_key, &db_pragma_value); - if (ret < 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed to get %s pragma", pragma_key); - } else { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, 0, "Value set on DB %s : %s", - pragma_key, db_pragma_value); - } - GF_FREE(db_pragma_value); - - ret = 0; - -out: - - return ret; -} - -/* Function to vacuum of sqlite db - * Input: - * void *db_conn : Sqlite connection - * gf_boolean_t compact_active : Is compaction on? - * gf_boolean_t compact_mode_switched : Did we just flip the compaction switch? - * Return: - * On success return 0 - * On failure return -1 - * */ -int -gf_sqlite3_vacuum(void *db_conn, gf_boolean_t compact_active, - gf_boolean_t compact_mode_switched) -{ - int ret = -1; - gf_sql_connection_t *sql_conn = db_conn; - char *sqlstring = NULL; - char *sql_strerror = NULL; - gf_boolean_t changing_pragma = _gf_true; - - CHECK_SQL_CONN(sql_conn, out); - - if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_NONE) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS, - "VACUUM type is off: no VACUUM to do"); - goto out; - } - - if (compact_mode_switched) { - if (compact_active) { /* Then it was OFF before. - So turn everything on */ - ret = 0; - switch (GF_SQL_COMPACT_DEF) { - case GF_SQL_COMPACT_FULL: - ret = gf_sqlite3_set_pragma(db_conn, "auto_vacuum", - GF_SQL_AV_FULL); - break; - case GF_SQL_COMPACT_INCR: - ret = gf_sqlite3_set_pragma(db_conn, "auto_vacuum", - GF_SQL_AV_INCR); - break; - case GF_SQL_COMPACT_MANUAL: - changing_pragma = _gf_false; - break; - default: - ret = -1; - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_COMPACT_FAILED, "VACUUM type undefined"); - goto out; - break; - } - - } else { /* Then it was ON before, so turn it all off */ - if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_FULL || - GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_INCR) { - ret = gf_sqlite3_set_pragma(db_conn, "auto_vacuum", - GF_SQL_AV_NONE); - } else { - changing_pragma = _gf_false; - } - } - - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_PREPARE_FAILED, - "Failed to set the pragma"); - goto out; - } - - gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS, - "Turning compaction %i", GF_SQL_COMPACT_DEF); - - /* If we move from an auto_vacuum scheme to off, */ - /* or vice-versa, we must VACUUM to save the change. */ - /* In the case of a manual VACUUM scheme, we might as well */ - /* run a manual VACUUM now if we */ - if (changing_pragma || compact_active) { - ret = gf_asprintf(&sqlstring, "VACUUM;"); - if (ret <= 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed allocating memory"); - goto out; - } - gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS, - "Sealed with a VACUUM"); - } - } else { /* We are active, so it's time to VACUUM */ - if (!compact_active) { /* Did we somehow enter an inconsistent - state? */ - ret = -1; - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Tried to VACUUM when compaction inactive"); - goto out; - } - - gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_COMPACT_STATUS, - "Doing regular vacuum of type %i", GF_SQL_COMPACT_DEF); - - switch (GF_SQL_COMPACT_DEF) { - case GF_SQL_COMPACT_INCR: /* INCR auto_vacuum */ - ret = gf_asprintf(&sqlstring, "PRAGMA incremental_vacuum;"); - if (ret <= 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_PREPARE_FAILED, "Failed allocating memory"); - goto out; - } - gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS, - "Will commence an incremental VACUUM"); - break; - /* (MANUAL) Invoke the VACUUM command */ - case GF_SQL_COMPACT_MANUAL: - ret = gf_asprintf(&sqlstring, "VACUUM;"); - if (ret <= 0) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_PREPARE_FAILED, "Failed allocating memory"); - goto out; - } - gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS, - "Will commence a VACUUM"); - break; - /* (FULL) The database does the compaction itself. */ - /* We cannot do anything else, so we can leave */ - /* without sending anything to the database */ - case GF_SQL_COMPACT_FULL: - ret = 0; - goto success; - /* Any other state must be an error. Note that OFF */ - /* cannot hit this statement since we immediately leave */ - /* in that case */ - default: - ret = -1; - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_COMPACT_FAILED, - "VACUUM type undefined"); - goto out; - break; - } - } - - gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_COMPACT_STATUS, - "SQLString == %s", sqlstring); - - ret = sqlite3_exec(sql_conn->sqlite3_db_conn, sqlstring, NULL, NULL, - &sql_strerror); - - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED, - "Failed to vacuum " - "the db : %s", - sqlite3_errmsg(db_conn)); - ret = -1; - goto out; - } -success: - gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS, - compact_mode_switched ? "Successfully changed VACUUM on/off" - : "DB successfully VACUUM"); -out: - GF_FREE(sqlstring); - - return ret; -} diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h deleted file mode 100644 index d8af800db3c..00000000000 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef __GFDB_SQLITE3_H -#define __GFDB_SQLITE3_H - -/*Sqlite3 header file*/ -#include <sqlite3.h> - -#include "logging.h" -#include "gfdb_data_store_types.h" -#include "gfdb_mem-types.h" -#include "libglusterfs-messages.h" - -#define GF_STMT_SIZE_MAX 2048 - -#define GF_DB_NAME "gfdb.db" -#define GF_FILE_TABLE "GF_FILE_TB" -#define GF_FILE_LINK_TABLE "GF_FLINK_TB" -#define GF_MASTER_TABLE "sqlite_master" - -/*Since we have multiple tables to be created we put it in a transaction*/ -#define GF_CREATE_STMT(out_str) \ - do { \ - sprintf(out_str, "BEGIN; CREATE TABLE IF NOT EXISTS " GF_FILE_TABLE \ - "(GF_ID TEXT PRIMARY KEY NOT NULL, " \ - "W_SEC INTEGER NOT NULL DEFAULT 0, " \ - "W_MSEC INTEGER NOT NULL DEFAULT 0, " \ - "UW_SEC INTEGER NOT NULL DEFAULT 0, " \ - "UW_MSEC INTEGER NOT NULL DEFAULT 0, " \ - "W_READ_SEC INTEGER NOT NULL DEFAULT 0, " \ - "W_READ_MSEC INTEGER NOT NULL DEFAULT 0, " \ - "UW_READ_SEC INTEGER NOT NULL DEFAULT 0, " \ - "UW_READ_MSEC INTEGER NOT NULL DEFAULT 0, " \ - "WRITE_FREQ_CNTR INTEGER NOT NULL DEFAULT 1, " \ - "READ_FREQ_CNTR INTEGER NOT NULL DEFAULT 1); " \ - "CREATE TABLE IF NOT EXISTS " GF_FILE_LINK_TABLE \ - "(GF_ID TEXT NOT NULL, " \ - "GF_PID TEXT NOT NULL, " \ - "FNAME TEXT NOT NULL, " \ - "W_DEL_FLAG INTEGER NOT NULL DEFAULT 0, " \ - "LINK_UPDATE INTEGER NOT NULL DEFAULT 0, " \ - "PRIMARY KEY ( GF_ID, GF_PID, FNAME) " \ - ");" \ - "COMMIT;"); \ - ; \ - } while (0) - -#define GF_COL_TB_WSEC GF_FILE_TABLE "." GF_COL_WSEC -#define GF_COL_TB_WMSEC GF_FILE_TABLE "." GF_COL_WMSEC -#define GF_COL_TB_UWSEC GF_FILE_TABLE "." GF_COL_UWSEC -#define GF_COL_TB_UWMSEC GF_FILE_TABLE "." GF_COL_UWMSEC -#define GF_COL_TB_RWSEC GF_FILE_TABLE "." GF_COL_WSEC_READ -#define GF_COL_TB_RWMSEC GF_FILE_TABLE "." GF_COL_WMSEC_READ -#define GF_COL_TB_RUWSEC GF_FILE_TABLE "." GF_COL_UWSEC_READ -#define GF_COL_TB_RUWMSEC GF_FILE_TABLE "." GF_COL_UWMSEC_READ -#define GF_COL_TB_WFC GF_FILE_TABLE "." GF_COL_WRITE_FREQ_CNTR -#define GF_COL_TB_RFC GF_FILE_TABLE "." GF_COL_READ_FREQ_CNTR - -/******************************************************************************* - * SQLITE3 Connection details and PRAGMA - * ****************************************************************************/ - -#define GF_SQL_AV_NONE "none" -#define GF_SQL_AV_FULL "full" -#define GF_SQL_AV_INCR "incremental" - -#define GF_SQL_SYNC_OFF "off" -#define GF_SQL_SYNC_NORMAL "normal" -#define GF_SQL_SYNC_FULL "full" - -#define GF_SQL_JM_DELETE "delete" -#define GF_SQL_JM_TRUNCATE "truncate" -#define GF_SQL_JM_PERSIST "persist" -#define GF_SQL_JM_MEMORY "memory" -#define GF_SQL_JM_WAL "wal" -#define GF_SQL_JM_OFF "off" - -#define GF_SQL_COMPACT_NONE 0 -#define GF_SQL_COMPACT_FULL 1 -#define GF_SQL_COMPACT_INCR 2 -#define GF_SQL_COMPACT_MANUAL 3 - -#define GF_SQL_COMPACT_DEF GF_SQL_COMPACT_INCR -typedef enum gf_sql_auto_vacuum { - gf_sql_av_none = 0, - gf_sql_av_full, - gf_sql_av_incr, - gf_sql_av_invalid -} gf_sql_auto_vacuum_t; - -typedef enum gf_sql_sync { - gf_sql_sync_off = 0, - gf_sql_sync_normal, - gf_sql_sync_full, - gf_sql_sync_invalid -} gf_sql_sync_t; - -typedef enum gf_sql_journal_mode { - gf_sql_jm_wal = 0, - gf_sql_jm_delete, - gf_sql_jm_truncate, - gf_sql_jm_persist, - gf_sql_jm_memory, - gf_sql_jm_off, - gf_sql_jm_invalid -} gf_sql_journal_mode_t; - -typedef struct gf_sql_connection { - char sqlite3_db_path[PATH_MAX]; - sqlite3 *sqlite3_db_conn; - ssize_t cache_size; - ssize_t page_size; - ssize_t wal_autocheckpoint; - gf_sql_journal_mode_t journal_mode; - gf_sql_sync_t synchronous; - gf_sql_auto_vacuum_t auto_vacuum; -} gf_sql_connection_t; - -#define CHECK_SQL_CONN(sql_conn, out) \ - do { \ - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, sql_conn, out); \ - if (!sql_conn->sqlite3_db_conn) { \ - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, \ - LG_MSG_CONNECTION_INIT_FAILED, \ - "sqlite3 connection not initialized"); \ - goto out; \ - }; \ - } while (0) - -#define GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, param_key, format, value, \ - ret, error) \ - do { \ - sprintf(sqlite3_config_str, "PRAGMA %s = " format, param_key, value); \ - ret = sqlite3_exec(sql_conn->sqlite3_db_conn, sqlite3_config_str, \ - NULL, NULL, NULL); \ - if (ret != SQLITE_OK) { \ - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_EXEC_FAILED, \ - "Failed executing: %s : %s", sqlite3_config_str, \ - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); \ - ret = -1; \ - goto error; \ - }; \ - } while (0) - -/************************SQLITE3 PARAMS KEYS***********************************/ -#define GFDB_SQL_PARAM_DBPATH "sql-db-path" -#define GFDB_SQL_PARAM_CACHE_SIZE "sql-db-cachesize" -#define GFDB_SQL_PARAM_PAGE_SIZE "sql-db-pagesize" -#define GFDB_SQL_PARAM_JOURNAL_MODE "sql-db-journalmode" -#define GFDB_SQL_PARAM_WAL_AUTOCHECK "sql-db-wal-autocheckpoint" -#define GFDB_SQL_PARAM_SYNC "sql-db-sync" -#define GFDB_SQL_PARAM_AUTO_VACUUM "sql-db-autovacuum" - -#define GF_SQL_DEFAULT_DBPATH "" -#define GF_SQL_DEFAULT_PAGE_SIZE "4096" -#define GF_SQL_DEFAULT_CACHE_SIZE "12500" -#define GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT "25000" -#define GF_SQL_DEFAULT_JOURNAL_MODE GF_SQL_JM_WAL -#define GF_SQL_DEFAULT_SYNC GF_SQL_SYNC_OFF -#define GF_SQL_DEFAULT_AUTO_VACUUM GF_SQL_AV_NONE - -/* Defines the indexs for sqlite params - * The order should be maintained*/ -typedef enum sqlite_param_index { - sql_dbpath_ix = 0, - sql_pagesize_ix, - sql_cachesize_ix, - sql_journalmode_ix, - sql_walautocheck_ix, - sql_dbsync_ix, - sql_autovacuum_ix, - /*This should be in the end*/ - sql_index_max -} sqlite_param_index_t; - -/* Array to hold the sqlite param keys - * The order should be maintained as sqlite_param_index_t*/ -static char *sqlite_params_keys[] = { - GFDB_SQL_PARAM_DBPATH, GFDB_SQL_PARAM_PAGE_SIZE, - GFDB_SQL_PARAM_CACHE_SIZE, GFDB_SQL_PARAM_JOURNAL_MODE, - GFDB_SQL_PARAM_WAL_AUTOCHECK, GFDB_SQL_PARAM_SYNC, - GFDB_SQL_PARAM_AUTO_VACUUM}; - -/* Array of default values for sqlite params - * The order should be maintained as sqlite_param_index_t*/ -static char *sqlite_params_default_value[] = {GF_SQL_DEFAULT_DBPATH, - GF_SQL_DEFAULT_PAGE_SIZE, - GF_SQL_DEFAULT_CACHE_SIZE, - GF_SQL_DEFAULT_JOURNAL_MODE, - GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT, - GF_SQL_DEFAULT_SYNC, - GF_SQL_DEFAULT_AUTO_VACUUM}; - -/*Extract sql params from page_size to auto_vacumm - * The dbpath is extracted in a different way*/ -static inline int -gfdb_set_sql_params(char *comp_name, dict_t *from_dict, dict_t *to_dict) -{ - sqlite_param_index_t sql_index = sql_pagesize_ix; - char *_val_str = NULL; - int ret = -1; - - GF_ASSERT(comp_name); - GF_ASSERT(from_dict); - GF_ASSERT(to_dict); - - /*Extract and Set of the sql params from page_size*/ - for (sql_index = sql_pagesize_ix; sql_index < sql_index_max; sql_index++) { - _val_str = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(comp_name, from_dict, - sqlite_params_keys[sql_index], _val_str, - sqlite_params_default_value[sql_index]); - SET_DB_PARAM_TO_DICT(comp_name, to_dict, sqlite_params_keys[sql_index], - _val_str, ret, out); - } -out: - return ret; -} - -/*************************SQLITE3 GFDB PLUGINS*********************************/ - -/*Db init and fini modules*/ -int -gf_sqlite3_fini(void **db_conn); -int -gf_sqlite3_init(dict_t *args, void **db_conn); - -/*insert/update/delete modules*/ -int -gf_sqlite3_insert(void *db_conn, gfdb_db_record_t *); -int -gf_sqlite3_delete(void *db_conn, gfdb_db_record_t *); - -/*querying modules*/ -int -gf_sqlite3_find_all(void *db_conn, gf_query_callback_t, void *_query_cbk_args, - int query_limit); -int -gf_sqlite3_find_unchanged_for_time(void *db_conn, - gf_query_callback_t query_callback, - void *_query_cbk_args, - gfdb_time_t *for_time); -int -gf_sqlite3_find_recently_changed_files(void *db_conn, - gf_query_callback_t query_callback, - void *_query_cbk_args, - gfdb_time_t *from_time); -int -gf_sqlite3_find_unchanged_for_time_freq(void *db_conn, - gf_query_callback_t query_callback, - void *_query_cbk_args, - gfdb_time_t *for_time, - int write_freq_cnt, int read_freq_cnt, - gf_boolean_t clear_counters); -int -gf_sqlite3_find_recently_changed_files_freq( - void *db_conn, gf_query_callback_t query_callback, void *_query_cbk_args, - gfdb_time_t *from_time, int write_freq_cnt, int read_freq_cnt, - gf_boolean_t clear_counters); - -int -gf_sqlite3_clear_files_heat(void *db_conn); - -/* Function to extract version of sqlite db - * Input: - * void *db_conn : Sqlite connection - * char **version : the version is extracted as a string and will be stored in - * this variable. The freeing of the memory should be done by - * the caller. - * Return: - * On success return the length of the version string that is - * extracted. - * On failure return -1 - * */ -int -gf_sqlite3_version(void *db_conn, char **version); - -/* Function to extract PRAGMA or setting from sqlite db - * Input: - * void *db_conn : Sqlite connection - * char *pragma_key : PRAGMA or setting to be extracted - * char **pragma_value : the value of the PRAGMA or setting that is - * extracted. This function will allocate memory - * to pragma_value. The caller should free the memory - * Return: - * On success return the length of the pragma/setting value that is - * extracted. - * On failure return -1 - * */ -int -gf_sqlite3_pragma(void *db_conn, char *pragma_key, char **pragma_value); - -/* Function to set PRAGMA to sqlite db - * Input: - * void *db_conn : Sqlite connection - * char *pragma_key : PRAGMA to be set - * char *pragma_value : the value of the PRAGMA - * Return: - * On success return 0 - * On failure return -1 - * */ -int -gf_sqlite3_set_pragma(void *db_conn, char *pragma_key, char *pragma_value); - -/* Function to vacuum of sqlite db - * Input: - * void *db_conn : Sqlite connection - * gf_boolean_t compact_active : Is compaction on? - * gf_boolean_t compact_mode_switched : Did we just flip the compaction switch? - * Return: - * On success return 0 - * On failure return -1 - * */ -int -gf_sqlite3_vacuum(void *db_conn, gf_boolean_t compact_active, - gf_boolean_t compact_mode_switched); - -void -gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops); - -#endif diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.c b/libglusterfs/src/gfdb/gfdb_sqlite3_helper.c deleted file mode 100644 index 60dd5e25e66..00000000000 --- a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.c +++ /dev/null @@ -1,1260 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "gfdb_sqlite3_helper.h" - -#define GFDB_SQL_STMT_SIZE 256 - -/***************************************************************************** - * - * Helper function to execute actual sql queries - * - * - * ****************************************************************************/ - -static int -gf_sql_delete_all(gf_sql_connection_t *sql_conn, char *gfid, - gf_boolean_t ignore_errors) -{ - int ret = -1; - sqlite3_stmt *delete_file_stmt = NULL; - sqlite3_stmt *delete_link_stmt = NULL; - char *delete_link_str = "DELETE FROM " GF_FILE_LINK_TABLE - " WHERE GF_ID = ? ;"; - char *delete_file_str = "DELETE FROM " GF_FILE_TABLE " WHERE GF_ID = ? ;"; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out); - - /* - * Delete all links associated with this GFID - * - * */ - /*Prepare statement for delete all links*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, delete_link_str, -1, - &delete_link_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing delete " - "statement %s : %s", - delete_link_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(delete_link_stmt, 1, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(delete_link_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared stmt %s : %s", delete_link_str, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /* - * Delete entry from file table associated with this GFID - * - * */ - /*Prepare statement for delete all links*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, delete_file_str, -1, - &delete_file_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing delete " - "statement %s : %s", - delete_file_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(delete_file_stmt, 1, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(delete_file_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared stmt %s : %s", delete_file_str, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - -out: - /*Free prepared statement*/ - sqlite3_finalize(delete_file_stmt); - sqlite3_finalize(delete_link_stmt); - return ret; -} - -static int -gf_sql_delete_link(gf_sql_connection_t *sql_conn, char *gfid, char *pargfid, - char *basename, gf_boolean_t ignore_errors) -{ - int ret = -1; - sqlite3_stmt *delete_stmt = NULL; - char *delete_str = "DELETE FROM " GF_FILE_LINK_TABLE - " WHERE GF_ID = ? AND GF_PID = ?" - " AND FNAME = ?;"; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out); - - /*Prepare statement*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, delete_str, -1, - &delete_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing delete " - "statement %s : %s", - delete_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(delete_stmt, 1, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind pargfid*/ - ret = sqlite3_bind_text(delete_stmt, 2, pargfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent gfid %s " - ": %s", - pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind basename*/ - ret = sqlite3_bind_text(delete_stmt, 3, basename, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding basename %s : " - "%s", - basename, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(delete_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared stmt %s : %s", delete_str, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - ret = 0; -out: - /*Free prepared statement*/ - sqlite3_finalize(delete_stmt); - return ret; -} - -static int -gf_sql_update_link_flags(gf_sql_connection_t *sql_conn, char *gfid, - char *pargfid, char *basename, int update_flag, - gf_boolean_t is_update_or_delete, - gf_boolean_t ignore_errors) -{ - int ret = -1; - sqlite3_stmt *update_stmt = NULL; - char *update_column = NULL; - char update_str[1024] = ""; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out); - - update_column = (is_update_or_delete) ? "LINK_UPDATE" : "W_DEL_FLAG"; - - sprintf(update_str, - "UPDATE " GF_FILE_LINK_TABLE - " SET %s = ?" - " WHERE GF_ID = ? AND GF_PID = ? AND FNAME = ?;", - update_column); - - /*Prepare statement*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, update_str, -1, - &update_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing update " - "statement %s : %s", - update_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind link_update*/ - ret = sqlite3_bind_int(update_stmt, 1, update_flag); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding update_flag %d " - ": %s", - update_flag, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(update_stmt, 2, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind pargfid*/ - ret = sqlite3_bind_text(update_stmt, 3, pargfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent gfid %s " - ": %s", - pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind basename*/ - ret = sqlite3_bind_text(update_stmt, 4, basename, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding basename %s : " - "%s", - basename, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(update_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared stmt %s : %s", update_str, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - ret = 0; -out: - /*Free prepared statement*/ - sqlite3_finalize(update_stmt); - return ret; -} - -static int -gf_sql_insert_link(gf_sql_connection_t *sql_conn, char *gfid, char *pargfid, - char *basename, gf_boolean_t link_consistency, - gf_boolean_t ignore_errors) -{ - int ret = -1; - sqlite3_stmt *insert_stmt = NULL; - char insert_str[GFDB_SQL_STMT_SIZE] = ""; - - sprintf(insert_str, - "INSERT INTO " GF_FILE_LINK_TABLE - " (GF_ID, GF_PID, FNAME," - " W_DEL_FLAG, LINK_UPDATE) " - " VALUES (?, ?, ?, 0, %d);", - link_consistency); - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out); - - /*Prepare statement*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, insert_str, -1, - &insert_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing insert " - "statement %s : %s", - insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(insert_stmt, 1, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind pargfid*/ - ret = sqlite3_bind_text(insert_stmt, 2, pargfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent gfid %s " - ": %s", - pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind basename*/ - ret = sqlite3_bind_text(insert_stmt, 3, basename, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding basename %s : %s", basename, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(insert_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared " - "stmt %s %s %s %s : %s", - gfid, pargfid, basename, insert_str, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - ret = 0; -out: - /*Free prepared statement*/ - sqlite3_finalize(insert_stmt); - return ret; -} - -static int -gf_sql_update_link(gf_sql_connection_t *sql_conn, char *gfid, char *pargfid, - char *basename, char *old_pargfid, char *old_basename, - gf_boolean_t link_consistency, gf_boolean_t ignore_errors) -{ - int ret = -1; - sqlite3_stmt *insert_stmt = NULL; - char insert_str[GFDB_SQL_STMT_SIZE] = ""; - - sprintf(insert_str, - "INSERT INTO " GF_FILE_LINK_TABLE - " (GF_ID, GF_PID, FNAME," - " W_DEL_FLAG, LINK_UPDATE) " - " VALUES (? , ?, ?, 0, %d);", - link_consistency); - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, old_pargfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, old_basename, out); - - /* - * - * Delete the old link - * - * */ - ret = gf_sql_delete_link(sql_conn, gfid, old_pargfid, old_basename, - ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_DELETE_FAILED, "Failed deleting old link"); - goto out; - } - - /* - * - * insert new link - * - * */ - /*Prepare statement*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, insert_str, -1, - &insert_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing insert " - "statement %s : %s", - insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(insert_stmt, 1, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind new pargfid*/ - ret = sqlite3_bind_text(insert_stmt, 2, pargfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent gfid %s " - ": %s", - pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind new basename*/ - ret = sqlite3_bind_text(insert_stmt, 3, basename, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding basename %s : " - "%s", - basename, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(insert_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared stmt %s : %s", insert_str, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - ret = 0; -out: - /*Free prepared statement*/ - sqlite3_finalize(insert_stmt); - return ret; -} - -static int -gf_sql_insert_write_wind_time(gf_sql_connection_t *sql_conn, char *gfid, - gfdb_time_t *wind_time, - gf_boolean_t ignore_errors) -{ - int ret = -1; - sqlite3_stmt *insert_stmt = NULL; - char *insert_str = "INSERT INTO " GF_FILE_TABLE - "(GF_ID, W_SEC, W_MSEC, UW_SEC, UW_MSEC)" - " VALUES (?, ?, ?, 0, 0);"; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, wind_time, out); - - /*Prepare statement*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, insert_str, -1, - &insert_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing insert " - "statement %s : %s", - insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(insert_stmt, 1, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind wind secs*/ - ret = sqlite3_bind_int(insert_stmt, 2, wind_time->tv_sec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent wind " - "secs %ld : %s", - wind_time->tv_sec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind wind msecs*/ - ret = sqlite3_bind_int(insert_stmt, 3, wind_time->tv_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent wind " - "msecs %ld : %s", - wind_time->tv_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(insert_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared stmt GFID:%s %s : %s", gfid, - insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - ret = 0; -out: - /*Free prepared statement*/ - sqlite3_finalize(insert_stmt); - return ret; -} - -/*Update write/read times for both wind and unwind*/ -static int -gf_update_time(gf_sql_connection_t *sql_conn, char *gfid, - gfdb_time_t *update_time, gf_boolean_t record_counter, - gf_boolean_t is_wind, gf_boolean_t is_read, - gf_boolean_t ignore_errors) -{ - int ret = -1; - sqlite3_stmt *update_stmt = NULL; - char update_str[1024] = ""; - char *freq_cntr_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, update_time, out); - - /* - * Constructing the prepare statement string. - * - * */ - /*For write time*/ - if (!is_read) { - if (is_wind) { - /*if record counter is on*/ - freq_cntr_str = (record_counter) - ? ", WRITE_FREQ_CNTR = WRITE_FREQ_CNTR + 1" - : ""; - - /*Perfectly safe as we will not go array of bound*/ - sprintf(update_str, - "UPDATE " GF_FILE_TABLE - " SET W_SEC = ?, W_MSEC = ? " - " %s" /*place for read freq counters*/ - " WHERE GF_ID = ? ;", - freq_cntr_str); - } else { - /*Perfectly safe as we will not go array of bound*/ - sprintf(update_str, - "UPDATE " GF_FILE_TABLE " SET UW_SEC = ?, UW_MSEC = ? ;"); - } - } - /*For Read Time update*/ - else { - if (is_wind) { - /*if record counter is on*/ - freq_cntr_str = (record_counter) - ? ", READ_FREQ_CNTR = READ_FREQ_CNTR + 1" - : ""; - - /*Perfectly safe as we will not go array of bound*/ - sprintf(update_str, - "UPDATE " GF_FILE_TABLE - " SET W_READ_SEC = ?, W_READ_MSEC = ? " - " %s" /*place for read freq counters*/ - " WHERE GF_ID = ? ;", - freq_cntr_str); - } else { - /*Perfectly safe as we will not go array of bound*/ - sprintf(update_str, "UPDATE " GF_FILE_TABLE - " SET UW_READ_SEC = ?, UW_READ_MSEC = ? ;"); - } - } - - /*Prepare statement*/ - ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, update_str, -1, - &update_stmt, 0); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED, - "Failed preparing insert " - "statement %s : %s", - update_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind time secs*/ - ret = sqlite3_bind_int(update_stmt, 1, update_time->tv_sec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent wind " - "secs %ld : %s", - update_time->tv_sec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind time msecs*/ - ret = sqlite3_bind_int(update_stmt, 2, update_time->tv_usec); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding parent wind " - "msecs %ld : %s", - update_time->tv_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Bind gfid*/ - ret = sqlite3_bind_text(update_stmt, 3, gfid, -1, NULL); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED, - "Failed binding gfid %s : %s", gfid, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - /*Execute the prepare statement*/ - if (sqlite3_step(update_stmt) != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors), - 0, LG_MSG_EXEC_FAILED, - "Failed executing the prepared stmt %s : %s", update_str, - sqlite3_errmsg(sql_conn->sqlite3_db_conn)); - ret = -1; - goto out; - } - - ret = 0; -out: - /*Free prepared statement*/ - sqlite3_finalize(update_stmt); - return ret; -} - -/****************************************************************************** - * - * Helper functions for gf_sqlite3_insert() - * - * - * ****************************************************************************/ - -int -gf_sql_insert_wind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record) -{ - int ret = -1; - gfdb_time_t *modtime = NULL; - char *pargfid_str = NULL; - char *gfid_str = NULL; - char *old_pargfid_str = NULL; - gf_boolean_t its_wind = _gf_true; /*remains true for this function*/ - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out); - - gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid)); - if (!gfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating gfid string failed."); - goto out; - } - - modtime = &gfdb_db_record->gfdb_wind_change_time; - - /* handle all dentry based operations */ - if (isdentryfop(gfdb_db_record->gfdb_fop_type)) { - /*Parent GFID is always set*/ - pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid)); - if (!pargfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating gfid string " - "failed."); - goto out; - } - - /* handle create, mknod */ - if (isdentrycreatefop(gfdb_db_record->gfdb_fop_type)) { - /*insert link*/ - ret = gf_sql_insert_link( - sql_conn, gfid_str, pargfid_str, gfdb_db_record->file_name, - gfdb_db_record->link_consistency, _gf_true); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_WARNING, - gfdb_db_record->ignore_errors), - 0, LG_MSG_INSERT_FAILED, - "Failed " - "inserting link in DB"); - /* Even if link creation is failed we - * continue with the creation of file record. - * This covers to cases - * 1) Lookup heal: If the file record from - * gf_file_tb is deleted but the link record - * still exist. Lookup heal will attempt a heal - * with create_wind set. The link heal will fail - * as there is already a record and if we don't - * ignore the error we will not heal the - * gf_file_tb. - * 2) Rename file in cold tier: During a rename - * of a file that is there in cold tier. We get - * an link record created in hot tier for the - * linkto file. When the file gets heated and - * moves to hot tier there will be attempt from - * ctr lookup heal to create link and file - * record and If we don't ignore the error we - * will not heal the gf_file_tb. - * */ - } - gfdb_db_record->islinkupdate = gfdb_db_record->link_consistency; - - /* - * Only for create/mknod insert wind time - * for the first time - * */ - ret = gf_sql_insert_write_wind_time(sql_conn, gfid_str, modtime, - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_INSERT_FAILED, - "Failed inserting wind time in DB"); - goto out; - } - goto out; - } - /*handle rename, link */ - else { - /*rename*/ - if (strlen(gfdb_db_record->old_file_name) != 0) { - old_pargfid_str = gf_strdup( - uuid_utoa(gfdb_db_record->old_pargfid)); - if (!old_pargfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_CREATE_FAILED, - "Creating gfid string failed."); - goto out; - } - ret = gf_sql_update_link( - sql_conn, gfid_str, pargfid_str, gfdb_db_record->file_name, - old_pargfid_str, gfdb_db_record->old_file_name, - gfdb_db_record->link_consistency, - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, "Failed updating link"); - goto out; - } - gfdb_db_record->islinkupdate = gfdb_db_record->link_consistency; - } - /*link*/ - else { - ret = gf_sql_insert_link(sql_conn, gfid_str, pargfid_str, - gfdb_db_record->file_name, - gfdb_db_record->link_consistency, - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_INSERT_FAILED, - "Failed inserting link in DB"); - goto out; - } - gfdb_db_record->islinkupdate = gfdb_db_record->link_consistency; - } - } - } - - /* update times only when said!*/ - if (gfdb_db_record->do_record_times) { - /*All fops update times read or write*/ - ret = gf_update_time(sql_conn, gfid_str, modtime, - gfdb_db_record->do_record_counters, its_wind, - isreadfop(gfdb_db_record->gfdb_fop_type), - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, - "Failed update wind time" - " in DB"); - goto out; - } - } - - ret = 0; -out: - GF_FREE(gfid_str); - GF_FREE(pargfid_str); - GF_FREE(old_pargfid_str); - return ret; -} - -int -gf_sql_insert_unwind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record) -{ - int ret = -1; - gfdb_time_t *modtime = NULL; - gf_boolean_t its_wind = _gf_true; /*remains true for this function*/ - char *gfid_str = NULL; - char *pargfid_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out); - - gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid)); - if (!gfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating gfid string failed."); - goto out; - } - - /*Only update if recording unwind is set*/ - if (gfdb_db_record->do_record_times && - gfdb_db_record->do_record_uwind_time) { - modtime = &gfdb_db_record->gfdb_unwind_change_time; - ret = gf_update_time(sql_conn, gfid_str, modtime, - gfdb_db_record->do_record_counters, (!its_wind), - isreadfop(gfdb_db_record->gfdb_fop_type), - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, - "Failed update unwind " - "time in DB"); - goto out; - } - } - - /*For link creation and changes we use link updated*/ - if (gfdb_db_record->islinkupdate && - isdentryfop(gfdb_db_record->gfdb_fop_type)) { - pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid)); - if (!pargfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating pargfid_str string failed."); - goto out; - } - - ret = gf_sql_update_link_flags(sql_conn, gfid_str, pargfid_str, - gfdb_db_record->file_name, 0, _gf_true, - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, - "Failed updating link flags in unwind"); - goto out; - } - } - - ret = 0; -out: - GF_FREE(gfid_str); - GF_FREE(pargfid_str); - return ret; -} - -int -gf_sql_update_delete_wind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record) -{ - int ret = -1; - char *gfid_str = NULL; - char *pargfid_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out); - - gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid)); - if (!gfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating gfid string failed."); - goto out; - } - - pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid)); - if (!pargfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating pargfid_str " - "string failed."); - goto out; - } - - if (gfdb_db_record->link_consistency) { - ret = gf_sql_update_link_flags(sql_conn, gfid_str, pargfid_str, - gfdb_db_record->file_name, 1, _gf_false, - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, - "Failed updating link flags in wind"); - goto out; - } - } - - ret = 0; -out: - GF_FREE(gfid_str); - GF_FREE(pargfid_str); - return ret; -} - -int -gf_sql_delete_unwind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record) -{ - int ret = -1; - char *gfid_str = NULL; - char *pargfid_str = NULL; - gfdb_time_t *modtime = NULL; - - CHECK_SQL_CONN(sql_conn, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out); - - gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid)); - if (!gfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating gfid string failed."); - goto out; - } - - /*Nuke all the entries for this GFID from DB*/ - if (gfdb_db_record->gfdb_fop_path == GFDB_FOP_UNDEL_ALL) { - gf_sql_delete_all(sql_conn, gfid_str, gfdb_db_record->ignore_errors); - } - /*Remove link entries only*/ - else if (gfdb_db_record->gfdb_fop_path == GFDB_FOP_UNDEL) { - pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid)); - if (!pargfid_str) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED, - "Creating pargfid_str " - "string failed."); - goto out; - } - - /* Special performance case: - * Updating wind time in unwind for delete. This is done here - * as in the wind path we will not know whether its the last - * link or not. For a last link there is not use to update any - * wind or unwind time!*/ - if (gfdb_db_record->do_record_times) { - /*Update the wind write times*/ - modtime = &gfdb_db_record->gfdb_wind_change_time; - ret = gf_update_time(sql_conn, gfid_str, modtime, - gfdb_db_record->do_record_counters, _gf_true, - isreadfop(gfdb_db_record->gfdb_fop_type), - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, - "Failed update wind time in DB"); - goto out; - } - } - - modtime = &gfdb_db_record->gfdb_unwind_change_time; - - ret = gf_sql_delete_link(sql_conn, gfid_str, pargfid_str, - gfdb_db_record->file_name, - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_DELETE_FAILED, - "Failed deleting link"); - goto out; - } - - if (gfdb_db_record->do_record_times && - gfdb_db_record->do_record_uwind_time) { - ret = gf_update_time(sql_conn, gfid_str, modtime, - gfdb_db_record->do_record_counters, _gf_false, - isreadfop(gfdb_db_record->gfdb_fop_type), - gfdb_db_record->ignore_errors); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, - _gfdb_log_level(GF_LOG_ERROR, - gfdb_db_record->ignore_errors), - 0, LG_MSG_UPDATE_FAILED, - "Failed update unwind time in DB"); - goto out; - } - } - } else { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_INVALID_UPLINK, - "Invalid unlink option"); - goto out; - } - ret = 0; -out: - GF_FREE(gfid_str); - GF_FREE(pargfid_str); - return ret; -} - -/****************************************************************************** - * - * Find/Query helper functions - * - * ****************************************************************************/ -int -gf_sql_query_function(sqlite3_stmt *prep_stmt, - gf_query_callback_t query_callback, void *_query_cbk_args) -{ - int ret = -1; - gfdb_query_record_t *query_record = NULL; - char *text_column = NULL; - sqlite3 *db_conn = NULL; - uuid_t prev_gfid = {0}; - uuid_t curr_gfid = {0}; - uuid_t pgfid = {0}; - char *base_name = NULL; - gf_boolean_t is_first_record = _gf_true; - gf_boolean_t is_query_empty = _gf_true; - - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, prep_stmt, out); - GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); - - db_conn = sqlite3_db_handle(prep_stmt); - - /* - * Loop to access queried rows - * Each db record will have 3 columns - * GFID, PGFID, FILE_NAME - * - * For file with multiple hard links we will get multiple query rows - * with the same GFID, but different PGID and FILE_NAME Combination - * For Example if a file with - * GFID = 00000000-0000-0000-0000-000000000006 - * has 3 hardlinks file1, file2 and file3 in 3 different folder - * with GFID's - * 00000000-0000-0000-0000-0000EFC00001, - * 00000000-0000-0000-0000-00000ABC0001 and - * 00000000-0000-0000-0000-00000ABC00CD - * Then there will be 3 records - * GFID : 00000000-0000-0000-0000-000000000006 - * PGFID : 00000000-0000-0000-0000-0000EFC00001 - * FILE_NAME : file1 - * - * GFID : 00000000-0000-0000-0000-000000000006 - * PGFID : 00000000-0000-0000-0000-00000ABC0001 - * FILE_NAME : file2 - * - * GFID : 00000000-0000-0000-0000-000000000006 - * PGFID : 00000000-0000-0000-0000-00000ABC00CD - * FILE_NAME : file3 - * - * This is retrieved and added to a single query_record - * - * query_record->gfid = 00000000-0000-0000-0000-000000000006 - * ->link_info = {00000000-0000-0000-0000-0000EFC00001, - * "file1"} - * | - * V - * link_info = {00000000-0000-0000-0000-00000ABC0001, - * "file2"} - * | - * V - * link_info = {00000000-0000-0000-0000-00000ABC0001, - * "file3", - * list} - * - * This query record is sent to the registered query_callback() - * - * */ - while ((ret = sqlite3_step(prep_stmt)) == SQLITE_ROW) { - if (sqlite3_column_count(prep_stmt) > 0) { - is_query_empty = _gf_false; - - /*Retrieving GFID - column index is 0*/ - text_column = (char *)sqlite3_column_text(prep_stmt, 0); - if (!text_column) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED, - "Failed to" - "retrieve GFID"); - goto out; - } - ret = gf_uuid_parse(text_column, curr_gfid); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PARSE_FAILED, - "Failed to parse " - "GFID"); - goto out; - } - - /* - * if the previous record was not of the current gfid - * call the call_back function and send the - * query record, which will have all the link_info - * objects associated with this gfid - * - * */ - if (gf_uuid_compare(curr_gfid, prev_gfid) != 0) { - /* If this is not the first record */ - if (!is_first_record) { - /*Call the call_back function provided*/ - ret = query_callback(query_record, _query_cbk_args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_QUERY_CALL_BACK_FAILED, - "Query call back " - "failed"); - goto out; - } - } - - /*Clear the query record*/ - gfdb_query_record_free(query_record); - query_record = NULL; - query_record = gfdb_query_record_new(); - if (!query_record) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_CREATE_FAILED, - "Failed to create " - "query_record"); - goto out; - } - - gf_uuid_copy(query_record->gfid, curr_gfid); - gf_uuid_copy(prev_gfid, curr_gfid); - } - - /* Get PGFID */ - text_column = (char *)sqlite3_column_text(prep_stmt, 1); - if (!text_column) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED, - "Failed to" - " retrieve GF_ID"); - goto out; - } - ret = gf_uuid_parse(text_column, pgfid); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PARSE_FAILED, - "Failed to parse " - "GF_ID"); - goto out; - } - - /* Get Base name */ - text_column = (char *)sqlite3_column_text(prep_stmt, 2); - if (!text_column) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED, - "Failed to" - " retrieve GF_ID"); - goto out; - } - base_name = text_column; - - /* Add link info to the list */ - ret = gfdb_add_link_to_query_record(query_record, pgfid, base_name); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED, - "Failed to" - " add link info to query record"); - goto out; - } - - is_first_record = _gf_false; - } - } - - if (ret != SQLITE_DONE) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED, - "Failed to retrieve records " - "from db : %s", - sqlite3_errmsg(db_conn)); - ret = -1; - goto out; - } - - if (!is_query_empty) { - /* - * Call the call_back function for the last record from the - * Database - * */ - ret = query_callback(query_record, _query_cbk_args); - if (ret) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_QUERY_CALL_BACK_FAILED, "Query call back failed"); - goto out; - } - } - - ret = 0; -out: - gfdb_query_record_free(query_record); - query_record = NULL; - return ret; -} - -int -gf_sql_clear_counters(gf_sql_connection_t *sql_conn) -{ - int ret = -1; - char *sql_strerror = NULL; - char *query_str = NULL; - - CHECK_SQL_CONN(sql_conn, out); - - query_str = "UPDATE " GF_FILE_TABLE " SET " GF_COL_READ_FREQ_CNTR - " = 0 , " GF_COL_WRITE_FREQ_CNTR " = 0 ;"; - - ret = sqlite3_exec(sql_conn->sqlite3_db_conn, query_str, NULL, NULL, - &sql_strerror); - if (ret != SQLITE_OK) { - gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_EXEC_FAILED, - "Failed to execute: %s : %s", query_str, sql_strerror); - sqlite3_free(sql_strerror); - ret = -1; - goto out; - } - - ret = 0; -out: - return ret; -} diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.h b/libglusterfs/src/gfdb/gfdb_sqlite3_helper.h deleted file mode 100644 index f19344a353c..00000000000 --- a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef __GFDB_SQLITE3_HELPER_H -#define __GFDB_SQLITE3_HELPER_H - -#include "gfdb_sqlite3.h" - -/****************************************************************************** - * - * Helper functions for gf_sqlite3_insert() - * - * ****************************************************************************/ - -int -gf_sql_insert_wind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record); - -int -gf_sql_insert_unwind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record); - -int -gf_sql_update_delete_wind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record); - -int -gf_sql_delete_unwind(gf_sql_connection_t *sql_conn, - gfdb_db_record_t *gfdb_db_record); - -/****************************************************************************** - * - * Find/Query helper functions - * - * ****************************************************************************/ - -int -gf_sql_query_function(sqlite3_stmt *prep_stmt, - gf_query_callback_t query_callback, - void *_query_cbk_args); - -int -gf_sql_clear_counters(gf_sql_connection_t *sql_conn); - -#endif diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c index b9fdb9a39be..64a93802f76 100644 --- a/libglusterfs/src/gidcache.c +++ b/libglusterfs/src/gidcache.c @@ -8,8 +8,9 @@ cases as published by the Free Software Foundation. */ -#include "gidcache.h" -#include "mem-pool.h" +#include "glusterfs/gidcache.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/common-utils.h" /* * We treat this as a very simple set-associative LRU cache, with entries aged @@ -64,8 +65,8 @@ gid_cache_lookup(gid_cache_t *cache, uint64_t id, uint64_t uid, uint64_t gid) time_t now; const gid_list_t *agl; + now = gf_time(); LOCK(&cache->gc_lock); - now = time(NULL); bucket = id % cache->gc_nbuckets; agl = BUCKET_START(cache->gc_cache, bucket); for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) { @@ -132,8 +133,8 @@ gid_cache_add(gid_cache_t *cache, gid_list_t *gl) if (!cache->gc_max_age) return 0; + now = gf_time(); LOCK(&cache->gc_lock); - now = time(NULL); /* * Scan for the first free entry or one that matches this id. The id diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 98990460c86..ae06f8be386 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -10,13 +10,8 @@ #include <pthread.h> -#include "glusterfs.h" -#include "globals.h" -#include "xlator.h" -#include "mem-pool.h" -#include "syncop.h" -#include "libglusterfs-messages.h" -#include "upcall-utils.h" +#include "glusterfs/syncop.h" +#include "glusterfs/libglusterfs-messages.h" const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_NULL] = "NULL", @@ -77,6 +72,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_PUT] = "PUT", [GF_FOP_ICREATE] = "ICREATE", [GF_FOP_NAMELINK] = "NAMELINK", + [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE", }; const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { @@ -98,16 +94,19 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { glusterfs_ctx_t *global_ctx = NULL; pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; xlator_t global_xlator; -static pthread_key_t this_xlator_key; -static pthread_key_t synctask_key; -static pthread_key_t uuid_buf_key; -static char global_uuid_buf[GF_UUID_BUF_SIZE]; -static pthread_key_t lkowner_buf_key; -static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE]; -static pthread_key_t leaseid_buf_key; static int gf_global_mem_acct_enable = 1; static pthread_once_t globals_inited = PTHREAD_ONCE_INIT; +static pthread_key_t free_key; + +static __thread xlator_t *thread_xlator = NULL; +static __thread void *thread_synctask = NULL; +static __thread void *thread_leaseid = NULL; +static __thread struct syncopctx thread_syncopctx = {}; +static __thread char thread_uuid_buf[GF_UUID_BUF_SIZE] = {}; +static __thread char thread_lkowner_buf[GF_LKOWNER_BUF_SIZE] = {}; +static __thread char thread_leaseid_buf[GF_LEASE_ID_BUF_SIZE] = {}; + int gf_global_mem_acct_enable_get(void) { @@ -121,12 +120,6 @@ gf_global_mem_acct_enable_set(int val) return 0; } -void -glusterfs_this_destroy(void *ptr) -{ - FREE(ptr); -} - static struct xlator_cbks global_cbks = { .forget = NULL, .release = NULL, @@ -211,18 +204,9 @@ struct volume_options global_xl_options[] = { static volume_opt_list_t global_xl_opt_list; -int +void glusterfs_this_init() { - int ret = 0; - ret = pthread_key_create(&this_xlator_key, glusterfs_this_destroy); - if (ret != 0) { - gf_msg("", GF_LOG_WARNING, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED, - "failed to create " - "the pthread key"); - return ret; - } - global_xlator.name = "glusterfs"; global_xlator.type = GF_GLOBAL_XLATOR_NAME; global_xlator.cbks = &global_cbks; @@ -236,301 +220,131 @@ glusterfs_this_init() global_xl_opt_list.given_opt = global_xl_options; list_add_tail(&global_xl_opt_list.list, &global_xlator.volume_options); - - return ret; } xlator_t ** __glusterfs_this_location() { - xlator_t **this_location = NULL; - int ret = 0; - - this_location = pthread_getspecific(this_xlator_key); - - if (!this_location) { - this_location = CALLOC(1, sizeof(*this_location)); - if (!this_location) - goto out; + xlator_t **this_location; - ret = pthread_setspecific(this_xlator_key, this_location); - if (ret != 0) { - FREE(this_location); - this_location = NULL; - goto out; - } - } -out: - if (this_location) { - if (!*this_location) - *this_location = &global_xlator; + this_location = &thread_xlator; + if (*this_location == NULL) { + thread_xlator = &global_xlator; } + return this_location; } xlator_t * glusterfs_this_get() { - xlator_t **this_location = NULL; - - this_location = __glusterfs_this_location(); - if (!this_location) - return &global_xlator; - - return *this_location; + return *__glusterfs_this_location(); } -int +void glusterfs_this_set(xlator_t *this) { - xlator_t **this_location = NULL; - - this_location = __glusterfs_this_location(); - if (!this_location) - return -ENOMEM; - - *this_location = this; - - return 0; + thread_xlator = this; } /* SYNCOPCTX */ -static pthread_key_t syncopctx_key; - -static void -syncopctx_key_destroy(void *ptr) -{ - struct syncopctx *opctx = ptr; - - if (opctx) { - if (opctx->groups) - GF_FREE(opctx->groups); - - GF_FREE(opctx); - } - - return; -} void * syncopctx_getctx() { - void *opctx = NULL; - - opctx = pthread_getspecific(syncopctx_key); - - return opctx; -} - -int -syncopctx_setctx(void *ctx) -{ - int ret = 0; - - ret = pthread_setspecific(syncopctx_key, ctx); - - return ret; -} - -static int -syncopctx_init(void) -{ - int ret; - - ret = pthread_key_create(&syncopctx_key, syncopctx_key_destroy); - - return ret; + return &thread_syncopctx; } /* SYNCTASK */ -int -synctask_init() -{ - int ret = 0; - - ret = pthread_key_create(&synctask_key, NULL); - - return ret; -} - void * synctask_get() { - void *synctask = NULL; - - synctask = pthread_getspecific(synctask_key); - - return synctask; + return thread_synctask; } -int +void synctask_set(void *synctask) { - int ret = 0; - - pthread_setspecific(synctask_key, synctask); - - return ret; + thread_synctask = synctask; } // UUID_BUFFER -void -glusterfs_uuid_buf_destroy(void *ptr) -{ - FREE(ptr); -} - -int -glusterfs_uuid_buf_init() -{ - int ret = 0; - - ret = pthread_key_create(&uuid_buf_key, glusterfs_uuid_buf_destroy); - return ret; -} - char * glusterfs_uuid_buf_get() { - char *buf; - int ret = 0; - - buf = pthread_getspecific(uuid_buf_key); - if (!buf) { - buf = MALLOC(GF_UUID_BUF_SIZE); - ret = pthread_setspecific(uuid_buf_key, (void *)buf); - if (ret) - buf = global_uuid_buf; - } - return buf; + return thread_uuid_buf; } /* LKOWNER_BUFFER */ -void -glusterfs_lkowner_buf_destroy(void *ptr) -{ - FREE(ptr); -} - -int -glusterfs_lkowner_buf_init() -{ - int ret = 0; - - ret = pthread_key_create(&lkowner_buf_key, glusterfs_lkowner_buf_destroy); - return ret; -} - char * glusterfs_lkowner_buf_get() { - char *buf; - int ret = 0; - - buf = pthread_getspecific(lkowner_buf_key); - if (!buf) { - buf = MALLOC(GF_LKOWNER_BUF_SIZE); - ret = pthread_setspecific(lkowner_buf_key, (void *)buf); - if (ret) - buf = global_lkowner_buf; - } - return buf; + return thread_lkowner_buf; } /* Leaseid buffer */ -void -glusterfs_leaseid_buf_destroy(void *ptr) -{ - FREE(ptr); -} - -int -glusterfs_leaseid_buf_init() -{ - int ret = 0; - - ret = pthread_key_create(&leaseid_buf_key, glusterfs_leaseid_buf_destroy); - return ret; -} char * glusterfs_leaseid_buf_get() { char *buf = NULL; - int ret = 0; - buf = pthread_getspecific(leaseid_buf_key); - if (!buf) { - buf = CALLOC(1, GF_LEASE_ID_BUF_SIZE); - ret = pthread_setspecific(leaseid_buf_key, (void *)buf); - if (ret) { - FREE(buf); - buf = NULL; - } + buf = thread_leaseid; + if (buf == NULL) { + buf = thread_leaseid_buf; + thread_leaseid = buf; } + return buf; } char * glusterfs_leaseid_exist() { - return pthread_getspecific(leaseid_buf_key); + return thread_leaseid; } static void -gf_globals_init_once() +glusterfs_cleanup(void *ptr) { - int ret = 0; - - ret = glusterfs_this_init(); - if (ret) { - gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_TRANSLATOR_INIT_FAILED, - "ERROR: glusterfs-translator init failed"); - goto out; + if (thread_syncopctx.groups != NULL) { + GF_FREE(thread_syncopctx.groups); } - ret = glusterfs_uuid_buf_init(); - if (ret) { - gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UUID_BUF_INIT_FAILED, - "ERROR: glusterfs uuid buffer init failed"); - goto out; - } + mem_pool_thread_destructor(NULL); +} - ret = glusterfs_lkowner_buf_init(); - if (ret) { - gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LKOWNER_BUF_INIT_FAILED, - "ERROR: glusterfs lkowner buffer init failed"); - goto out; - } +void +gf_thread_needs_cleanup(void) +{ + /* The value stored in free_key TLS is not really used for anything, but + * pthread implementation doesn't call the TLS destruction function unless + * it's != NULL. This function must be called whenever something is + * allocated for this thread so that glusterfs_cleanup() will be called + * and resources can be released. */ + (void)pthread_setspecific(free_key, (void *)1); +} - ret = glusterfs_leaseid_buf_init(); - if (ret) { - gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LEASEID_BUF_INIT_FAILED, - "ERROR: glusterfs leaseid buffer init failed"); - goto out; - } +static void +gf_globals_init_once() +{ + int ret = 0; - ret = synctask_init(); - if (ret) { - gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCTASK_INIT_FAILED, - "ERROR: glusterfs synctask init failed"); - goto out; - } + glusterfs_this_init(); - ret = syncopctx_init(); - if (ret) { - gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCOPCTX_INIT_FAILED, - "ERROR: glusterfs syncopctx init failed"); - goto out; - } -out: + /* This is needed only to cleanup the potential allocation of + * thread_syncopctx.groups. */ + ret = pthread_key_create(&free_key, glusterfs_cleanup); + if (ret != 0) { + gf_msg("", GF_LOG_ERROR, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED, + "failed to create the pthread key"); - if (ret) { gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_GLOBAL_INIT_FAILED, "Exiting as global initialization failed"); + exit(ret); } } diff --git a/libglusterfs/src/glusterfs/async.h b/libglusterfs/src/glusterfs/async.h new file mode 100644 index 00000000000..d1d70ae0bc7 --- /dev/null +++ b/libglusterfs/src/glusterfs/async.h @@ -0,0 +1,209 @@ +/* + Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __GLUSTERFS_ASYNC_H__ +#define __GLUSTERFS_ASYNC_H__ + +#define _LGPL_SOURCE + +#include <sys/types.h> +#include <signal.h> +#include <errno.h> + +#ifdef URCU_OLD + +/* TODO: Fix the include paths. Since this is a .h included from many places + * it makes no sense to append a '-I$(CONTRIBDIR)/userspace-rcu/' to each + * Makefile.am. I've also seen some problems with CI builders (they + * failed to find the include files, but the same source on another setup + * is working fine). */ +#include "wfcqueue.h" +#include "wfstack.h" + +#else /* !URCU_OLD */ + +#include <urcu/wfcqueue.h> +#include <urcu/wfstack.h> + +#endif /* URCU_OLD */ + +#include "glusterfs/xlator.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/list.h" +#include "glusterfs/libglusterfs-messages.h" + +/* This is the name prefix that all worker threads will have. A number will + * be added to differentiate them. */ +#define GF_ASYNC_THREAD_NAME "tpw" + +/* This value determines the maximum number of threads that are allowed. */ +#define GF_ASYNC_MAX_THREADS 128 + +/* This value determines how many additional threads will be started but will + * remain inactive until they are explicitly activated by the leader. This is + * useful to react faster to bursts of load, but at the same time we minimize + * contention if they are not really needed to handle current load. + * + * TODO: Instead of a fixed number, it would probably be better to use a + * prcentage of the available cores. */ +#define GF_ASYNC_SPARE_THREADS 2 + +/* This value determines the signal used to wake the leader when new work has + * been added to the queue. To do so we reuse SIGALRM, since the most logical + * candidates (SIGUSR1/SIGUSR2) are already used. This signal must not be used + * by anything else in the process. */ +#define GF_ASYNC_SIGQUEUE SIGALRM + +/* This value determines the signal that will be used to transfer leader role + * to other workers. */ +#define GF_ASYNC_SIGCTRL SIGVTALRM + +#define gf_async_warning(_err, _msg, _args...) \ + gf_msg("async", GF_LOG_WARNING, -(_err), LG_MSG_ASYNC_WARNING, _msg, \ + ##_args) + +#define gf_async_error(_err, _msg, _args...) \ + gf_msg("async", GF_LOG_ERROR, -(_err), LG_MSG_ASYNC_FAILURE, _msg, ##_args) + +#define gf_async_fatal(_err, _msg, _args...) \ + do { \ + GF_ABORT("Critical error in async module. Unable to continue. (" _msg \ + "). Error %d.", \ + ##_args, -(_err)); \ + } while (0) + +struct _gf_async; +typedef struct _gf_async gf_async_t; + +struct _gf_async_worker; +typedef struct _gf_async_worker gf_async_worker_t; + +struct _gf_async_queue; +typedef struct _gf_async_queue gf_async_queue_t; + +struct _gf_async_control; +typedef struct _gf_async_control gf_async_control_t; + +typedef void (*gf_async_callback_f)(xlator_t *xl, gf_async_t *async); + +struct _gf_async { + /* TODO: remove dependency on xl/THIS. */ + xlator_t *xl; + gf_async_callback_f cbk; + struct cds_wfcq_node queue; +}; + +struct _gf_async_worker { + /* Used to send asynchronous jobs related to the worker. */ + gf_async_t async; + + /* Member of the available workers stack. */ + struct cds_wfs_node stack; + + /* Thread object of the current worker. */ + pthread_t thread; + + /* Unique identifier of this worker. */ + int32_t id; + + /* Indicates if this worker is enabled. */ + bool running; +}; + +struct _gf_async_queue { + /* Structures needed to manage a wait-free queue. For better performance + * they are placed in two different cache lines, as recommended by URCU + * documentation, even though in our case some threads will be producers + * and consumers at the same time. */ + struct cds_wfcq_head head __attribute__((aligned(64))); + struct cds_wfcq_tail tail __attribute__((aligned(64))); +}; + +#define GF_ASYNC_COUNTS(_run, _stop) (((uint32_t)(_run) << 16) + (_stop)) +#define GF_ASYNC_COUNT_RUNNING(_count) ((_count) >> 16) +#define GF_ASYNC_COUNT_STOPPING(_count) ((_count)&65535) + +struct _gf_async_control { + gf_async_queue_t queue; + + /* Stack of unused workers. */ + struct __cds_wfs_stack available; + + /* Array of preallocated worker structures. */ + gf_async_worker_t *table; + + /* Used to synchronize main thread with workers on termination. */ + pthread_barrier_t sync; + + /* The id of the last thread that will be used for synchronization. */ + pthread_t sync_thread; + + /* Signal mask to wait for control signals from leader. */ + sigset_t sigmask_ctrl; + + /* Signal mask to wait for queued items. */ + sigset_t sigmask_queue; + + /* Saved signal handlers. */ + struct sigaction handler_ctrl; + struct sigaction handler_queue; + + /* PID of the current process. */ + pid_t pid; + + /* Maximum number of allowed threads. */ + uint32_t max_threads; + + /* Current number of running and stopping workers. This value is split + * into 2 16-bits fields to track both counters atomically at the same + * time. */ + uint32_t counts; + + /* It's used to control whether the asynchronous infrastructure is used + * or not. */ + bool enabled; +}; + +extern gf_async_control_t gf_async_ctrl; + +int32_t +gf_async_init(glusterfs_ctx_t *ctx); + +void +gf_async_fini(void); + +void +gf_async_adjust_threads(int32_t threads); + +static inline void +gf_async(gf_async_t *async, xlator_t *xl, gf_async_callback_f cbk) +{ + if (!gf_async_ctrl.enabled) { + cbk(xl, async); + return; + } + + async->xl = xl; + async->cbk = cbk; + cds_wfcq_node_init(&async->queue); + if (caa_unlikely(!cds_wfcq_enqueue(&gf_async_ctrl.queue.head, + &gf_async_ctrl.queue.tail, + &async->queue))) { + /* The queue was empty, so the leader could be sleeping. We need to + * wake it so that the new item can be processed. If the queue was not + * empty, we don't need to do anything special since the leader will + * take care of it. */ + if (caa_unlikely(kill(gf_async_ctrl.pid, GF_ASYNC_SIGQUEUE) < 0)) { + gf_async_fatal(errno, "Unable to wake leader worker."); + }; + } +} + +#endif /* !__GLUSTERFS_ASYNC_H__ */ diff --git a/libglusterfs/src/atomic.h b/libglusterfs/src/glusterfs/atomic.h index dbbdc309628..ced81748218 100644 --- a/libglusterfs/src/atomic.h +++ b/libglusterfs/src/glusterfs/atomic.h @@ -14,7 +14,7 @@ #include <inttypes.h> #include <stdbool.h> -#include "locking.h" +#include "glusterfs/locking.h" /* Macros used to join two arguments and generate a new macro name. */ #define GF_ATOMIC_MACRO_1(_macro) _macro diff --git a/libglusterfs/src/byte-order.h b/libglusterfs/src/glusterfs/byte-order.h index fd8cef9e58d..fd8cef9e58d 100644 --- a/libglusterfs/src/byte-order.h +++ b/libglusterfs/src/glusterfs/byte-order.h diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h index 815ea312c93..8237ea459bf 100644 --- a/libglusterfs/src/call-stub.h +++ b/libglusterfs/src/glusterfs/call-stub.h @@ -11,23 +11,19 @@ #ifndef _CALL_STUB_H_ #define _CALL_STUB_H_ -#include "xlator.h" -#include "defaults.h" -#include "default-args.h" -#include "stack.h" -#include "list.h" +#include "glusterfs/xlator.h" +#include "glusterfs/defaults.h" +#include "glusterfs/default-args.h" +#include "glusterfs/stack.h" +#include "glusterfs/list.h" typedef struct _call_stub { struct list_head list; - char wind; call_frame_t *frame; - glusterfs_fop_t fop; - gf_boolean_t poison; struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */ uint32_t jnl_meta_len; uint32_t jnl_data_len; void (*serialize)(struct _call_stub *, char *, char *); - union { fop_lookup_t lookup; fop_stat_t stat; @@ -81,6 +77,7 @@ typedef struct _call_stub { fop_put_t put; fop_icreate_t icreate; fop_namelink_t namelink; + fop_copy_file_range_t copy_file_range; } fn; union { @@ -136,8 +133,11 @@ typedef struct _call_stub { fop_put_cbk_t put; fop_icreate_cbk_t icreate; fop_namelink_cbk_t namelink; + fop_copy_file_range_cbk_t copy_file_range; } fn_cbk; - + glusterfs_fop_t fop; + gf_boolean_t poison; + char wind; default_args_t args; default_args_cbk_t args_cbk; } call_stub_t; @@ -589,6 +589,18 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); +call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t len, uint32_t flags, + dict_t *xdata); + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + void call_resume(call_stub_t *stub); void diff --git a/libglusterfs/src/checksum.h b/libglusterfs/src/glusterfs/checksum.h index 019bb14df71..019bb14df71 100644 --- a/libglusterfs/src/checksum.h +++ b/libglusterfs/src/glusterfs/checksum.h diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/glusterfs/circ-buff.h index a839cd03c67..822345b641b 100644 --- a/libglusterfs/src/circ-buff.h +++ b/libglusterfs/src/glusterfs/circ-buff.h @@ -11,9 +11,7 @@ #ifndef _CB_H #define _CB_H -#include "common-utils.h" -#include "logging.h" -#include "mem-types.h" +#include "glusterfs/common-utils.h" #define BUFFER_SIZE 10 #define TOTAL_SIZE BUFFER_SIZE + 1 diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/glusterfs/client_t.h index 57241a00d5f..a2c508e1d5c 100644 --- a/libglusterfs/src/client_t.h +++ b/libglusterfs/src/glusterfs/client_t.h @@ -11,9 +11,20 @@ #ifndef _CLIENT_T_H #define _CLIENT_T_H -#include "glusterfs.h" -#include "locking.h" /* for gf_lock_t, not included by glusterfs.h */ -#include "atomic.h" /* for gf_atomic_t */ +#include "glusterfs/glusterfs.h" +#include "glusterfs/locking.h" /* for gf_lock_t, not included by glusterfs.h */ +#include "glusterfs/atomic.h" /* for gf_atomic_t */ + +/* auth_data structure is required by RPC layer. But as it is also used in + * client_t structure validation, comparision, it is critical that it is defined + * in the larger scope of libglusterfs, instead of libgfrpc. With this change, + * even RPC will use this structure */ +#define GF_CLIENTT_AUTH_BYTES 400 +typedef struct client_auth_data { + int flavour; + int datalen; + char authdata[GF_CLIENTT_AUTH_BYTES]; +} client_auth_data_t; struct client_ctx { void *ctx_key; @@ -47,6 +58,8 @@ typedef struct _client { inode_t *subdir_inode; uuid_t subdir_gfid; int32_t opversion; + /* Variable to save fd_count for detach brick */ + gf_atomic_t fd_cnt; } client_t; #define GF_CLIENTCTX_INITIAL_SIZE 8 @@ -76,21 +89,12 @@ typedef struct clienttable clienttable_t; */ #define GF_CLIENTENTRY_ALLOCATED -2 -struct rpcsvc_auth_data; - -client_t * -gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid, - char *subdir_mount); - void gf_client_put(client_t *client, gf_boolean_t *detached); clienttable_t * gf_clienttable_alloc(void); -void -gf_client_clienttable_destroy(clienttable_t *clienttable); - client_t * gf_client_ref(client_t *client); @@ -136,4 +140,8 @@ gf_client_dump_inodes(xlator_t *this); int gf_client_disconnect(client_t *client); +client_t * +gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid, + char *subdir_mount); + #endif /* _CLIENT_T_H */ diff --git a/libglusterfs/src/cluster-syncop.h b/libglusterfs/src/glusterfs/cluster-syncop.h index 10388db74cc..d0ad5ed548c 100644 --- a/libglusterfs/src/cluster-syncop.h +++ b/libglusterfs/src/glusterfs/cluster-syncop.h @@ -11,13 +11,13 @@ #ifndef _CLUSTER_SYNCOP_H #define _CLUSTER_SYNCOP_H -#include "xlator.h" #include <sys/time.h> #include <pthread.h> #include <ucontext.h> -#include "defaults.h" -#include "default-args.h" -#include "syncop.h" + +#include "glusterfs/defaults.h" +#include "glusterfs/default-args.h" +#include "glusterfs/syncop.h" /********************************************************************* * diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h index 6a6fd8c9893..f297fdab5c9 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/glusterfs/common-utils.h @@ -18,6 +18,7 @@ #include <string.h> #include <assert.h> #include <pthread.h> +#include <unistd.h> #include <openssl/md5.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -26,6 +27,11 @@ #include <fnmatch.h> #include <uuid/uuid.h> +/* FreeBSD, etc. */ +#ifndef __BITS_PER_LONG +#define __BITS_PER_LONG (CHAR_BIT * (sizeof(long))) +#endif + #ifndef ffsll #define ffsll(x) __builtin_ffsll(x) #endif @@ -38,15 +44,10 @@ trap(void); /* To solve type punned error */ #define VOID(ptr) ((void **)((void *)ptr)) -#include "logging.h" -#include "glusterfs.h" -#include "locking.h" -#include "mem-pool.h" -#include "compat-uuid.h" -#include "iatt.h" -#include "libglusterfs-messages.h" -#include "protocol-common.h" -#include "iobuf.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/compat-uuid.h" +#include "glusterfs/iatt.h" +#include "glusterfs/libglusterfs-messages.h" #define STRINGIFY(val) #val #define TOSTRING(val) STRINGIFY(val) @@ -61,8 +62,8 @@ trap(void); #define min(a, b) ((a) < (b) ? (a) : (b)) #define max(a, b) ((a) > (b) ? (a) : (b)) -#define roof(a, b) ((((a) + (b)-1) / ((b != 0) ? (b) : 1)) * (b)) -#define floor(a, b) (((a) / ((b != 0) ? (b) : 1)) * (b)) +#define gf_roof(a, b) ((((a) + (b)-1) / ((b != 0) ? (b) : 1)) * (b)) +#define gf_floor(a, b) (((a) / ((b != 0) ? (b) : 1)) * (b)) #define IPv4_ADDR_SIZE 32 @@ -82,7 +83,6 @@ trap(void); #define GF_UNIT_PERCENT_STRING "%" #define GEOREP "geo-replication" -#define GHADOOP "glusterfs-hadoop" #define GLUSTERD_NAME "glusterd" #define GF_SELINUX_XATTR_KEY "security.selinux" @@ -120,12 +120,16 @@ trap(void); #define GF_HOUR_IN_SECONDS (60 * 60) #define GF_DAY_IN_SECONDS (24 * 60 * 60) #define GF_WEEK_IN_SECONDS (7 * 24 * 60 * 60) +#define GF_SEC_IN_NS 1000000000 +#define GF_MS_IN_NS 1000000 +#define GF_US_IN_NS 1000 /* Default timeout for both barrier and changelog translator */ #define BARRIER_TIMEOUT "120" /* Default value of signing waiting time to sign a file for bitrot */ #define SIGNING_TIMEOUT "120" +#define BR_WORKERS "4" /* xxhash */ #define GF_XXH64_DIGEST_LENGTH 8 @@ -147,14 +151,11 @@ trap(void); /* pthread related */ /* as per the man page, thread-name should be at max 16 bytes */ /* with prefix of 'glfs_' (5), we are left with 11 more bytes */ -#define GF_THREAD_NAMEMAX 11 +#define GF_THREAD_NAME_LIMIT 16 #define GF_THREAD_NAME_PREFIX "glfs_" -#define GF_THREAD_NAME_PREFIX_LEN 5 -#include <stdbool.h> -#define gf_boolean_t bool -#define _gf_false false -#define _gf_true true +/* Advisory buffer size for formatted timestamps (see gf_time_fmt) */ +#define GF_TIMESTR_SIZE 256 /* * we could have initialized these as +ve values and treated @@ -174,7 +175,9 @@ enum _gf_special_pid { GF_CLIENT_PID_BITD = -8, GF_CLIENT_PID_SCRUB = -9, GF_CLIENT_PID_TIER_DEFRAG = -10, - GF_SERVER_PID_TRASH = -11 + GF_SERVER_PID_TRASH = -11, + GF_CLIENT_PID_ADD_REPLICA_MOUNT = -12, + GF_CLIENT_PID_SET_UTIME = -13, }; enum _gf_xlator_ipc_targets { @@ -186,6 +189,12 @@ enum _gf_xlator_ipc_targets { typedef enum _gf_special_pid gf_special_pid_t; typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t; +/* Array to hold custom xattr keys */ +extern char *xattrs_to_heal[]; + +char ** +get_xattrs_to_heal(); + /* The DHT file rename operation is not a straightforward rename. * It involves creating linkto and linkfiles, and can unlink or rename the * source file depending on the hashed and cached subvols for the source @@ -246,6 +255,8 @@ list_node_del(struct list_node *node); struct dnscache * gf_dnscache_init(time_t ttl); +void +gf_dnscache_deinit(struct dnscache *cache); struct dnscache_entry * gf_dnscache_entry_init(void); void @@ -432,9 +443,6 @@ BIT_VALUE(unsigned char *array, unsigned int index) } \ } while (0) -#define GF_FILE_CONTENT_REQUESTED(_xattr_req, _content_limit) \ - (dict_get_uint64(_xattr_req, "glusterfs.content", _content_limit) == 0) - #ifdef DEBUG #define GF_ASSERT(x) assert(x); #else @@ -447,7 +455,16 @@ BIT_VALUE(unsigned char *array, unsigned int index) } while (0) #endif -#define GF_ABORT(msg) \ +/* Compile-time assert, borrowed from Linux kernel. */ +#ifdef HAVE_STATIC_ASSERT +#define GF_STATIC_ASSERT(expr, ...) \ + __gf_static_assert(expr, ##__VA_ARGS__, #expr) +#define __gf_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#else +#define GF_STATIC_ASSERT(expr, ...) +#endif + +#define GF_ABORT(msg...) \ do { \ gf_msg_callingfn("", GF_LOG_CRITICAL, 0, LG_MSG_ASSERTION_FAILED, \ "Assertion failed: " msg); \ @@ -477,18 +494,15 @@ union gf_sock_union { #define IOV_MIN(n) min(IOV_MAX, n) -#define GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scr) \ - do { \ - entry = NULL; \ - if (dir) { \ - entry = sys_readdir(dir, scr); \ - while (entry && (!strcmp(entry->d_name, ".") || \ - !fnmatch("*.tmp", entry->d_name, 0) || \ - !strcmp(entry->d_name, ".."))) { \ - entry = sys_readdir(dir, scr); \ - } \ - } \ - } while (0) +static inline gf_boolean_t +gf_irrelevant_entry(struct dirent *entry) +{ + GF_ASSERT(entry); + + return (!strcmp(entry->d_name, ".") || + !fnmatch("*.tmp", entry->d_name, 0) || + !strcmp(entry->d_name, "..")); +} static inline void iov_free(struct iovec *vector, int count) @@ -517,65 +531,193 @@ static inline struct iovec * iov_dup(const struct iovec *vector, int count) { int bytecount = 0; - int i; struct iovec *newvec = NULL; bytecount = (count * sizeof(struct iovec)); newvec = GF_MALLOC(bytecount, gf_common_mt_iovec); - if (!newvec) - return NULL; - - for (i = 0; i < count; i++) { - newvec[i].iov_len = vector[i].iov_len; - newvec[i].iov_base = vector[i].iov_base; + if (newvec != NULL) { + memcpy(newvec, vector, bytecount); } return newvec; } -static inline int -iov_subset(struct iovec *orig, int orig_count, off_t src_offset, - off_t dst_offset, struct iovec *new) +typedef struct _iov_iter { + const struct iovec *iovec; + void *ptr; + uint32_t len; + uint32_t count; +} iov_iter_t; + +static inline bool +iov_iter_init(iov_iter_t *iter, const struct iovec *iovec, uint32_t count, + uint32_t offset) { - int new_count = 0; - int i; - off_t offset = 0; - size_t start_offset = 0; - size_t end_offset = 0, origin_iov_len = 0; + uint32_t len; + + while (count > 0) { + count--; + len = iovec->iov_len; + if (offset < len) { + iter->ptr = iovec->iov_base + offset; + iter->len = len - offset; + iter->iovec = iovec + 1; + iter->count = count; + + return true; + } + offset -= len; + } - for (i = 0; i < orig_count; i++) { - origin_iov_len = orig[i].iov_len; + memset(iter, 0, sizeof(*iter)); - if ((offset + orig[i].iov_len < src_offset) || (offset > dst_offset)) { - goto not_subset; - } + return false; +} - if (!new) { - goto count_only; - } +static inline bool +iov_iter_end(iov_iter_t *iter) +{ + return iter->count == 0; +} - start_offset = 0; - end_offset = orig[i].iov_len; +static inline bool +iov_iter_next(iov_iter_t *iter, uint32_t size) +{ + GF_ASSERT(size <= iter->len); + + if (iter->len > size) { + iter->len -= size; + iter->ptr += size; + + return true; + } + if (iter->count > 0) { + iter->count--; + iter->ptr = iter->iovec->iov_base; + iter->len = iter->iovec->iov_len; + iter->iovec++; - if (src_offset >= offset) { - start_offset = (src_offset - offset); + return true; + } + + memset(iter, 0, sizeof(*iter)); + + return false; +} + +static inline uint32_t +iov_iter_copy(iov_iter_t *dst, iov_iter_t *src, uint32_t size) +{ + uint32_t len; + + len = src->len; + if (len > dst->len) { + len = dst->len; + } + if (len > size) { + len = size; + } + memcpy(dst->ptr, src->ptr, len); + + return len; +} + +static inline uint32_t +iov_iter_to_iovec(iov_iter_t *iter, struct iovec *iovec, int32_t idx, + uint32_t size) +{ + uint32_t len; + + len = iter->len; + if (len > size) { + len = size; + } + iovec[idx].iov_base = iter->ptr; + iovec[idx].iov_len = len; + + return len; +} + +static inline int +iov_subset(struct iovec *src, int src_count, uint32_t start, uint32_t size, + struct iovec **dst, int32_t dst_count) +{ + struct iovec iovec[src_count]; + iov_iter_t iter; + uint32_t len; + int32_t idx; + + if ((size == 0) || !iov_iter_init(&iter, src, src_count, start)) { + return 0; + } + + idx = 0; + do { + len = iov_iter_to_iovec(&iter, iovec, idx, size); + idx++; + size -= len; + } while ((size > 0) && iov_iter_next(&iter, len)); + + if (*dst == NULL) { + *dst = iov_dup(iovec, idx); + if (*dst == NULL) { + return -1; } + } else if (idx > dst_count) { + return -1; + } else { + memcpy(*dst, iovec, idx * sizeof(struct iovec)); + } + + return idx; +} - if (dst_offset <= (offset + orig[i].iov_len)) { - end_offset = (dst_offset - offset); +static inline int +iov_skip(struct iovec *iovec, uint32_t count, uint32_t size) +{ + uint32_t len, idx; + + idx = 0; + while ((size > 0) && (idx < count)) { + len = iovec[idx].iov_len; + if (len > size) { + iovec[idx].iov_len -= size; + iovec[idx].iov_base += size; + break; } + idx++; + size -= len; + } + + if (idx > 0) { + memmove(iovec, iovec + idx, (count - idx) * sizeof(struct iovec)); + } - new[new_count].iov_base = orig[i].iov_base + start_offset; - new[new_count].iov_len = end_offset - start_offset; + return count - idx; +} - count_only: - new_count++; +static inline size_t +iov_range_copy(const struct iovec *dst, uint32_t dst_count, uint32_t dst_offset, + const struct iovec *src, uint32_t src_count, uint32_t src_offset, + uint32_t size) +{ + iov_iter_t src_iter, dst_iter; + uint32_t len, total; - not_subset: - offset += origin_iov_len; + if ((size == 0) || !iov_iter_init(&src_iter, src, src_count, src_offset) || + !iov_iter_init(&dst_iter, dst, dst_count, dst_offset)) { + return 0; } - return new_count; + total = 0; + do { + len = iov_iter_copy(&dst_iter, &src_iter, size); + total += len; + size -= len; + } while ((size > 0) && iov_iter_next(&src_iter, len) && + iov_iter_next(&dst_iter, len)); + + return total; } static inline void @@ -614,54 +756,38 @@ iov_load(const struct iovec *vector, int count, char *buf, int size) static inline size_t iov_copy(const struct iovec *dst, int dcnt, const struct iovec *src, int scnt) { - size_t ret = 0; - size_t left = 0; - size_t min_i = 0; - int s_i = 0, s_ii = 0; - int d_i = 0, d_ii = 0; - - ret = min(iov_length(dst, dcnt), iov_length(src, scnt)); - left = ret; - - while (left) { - min_i = min(dst[d_i].iov_len - d_ii, src[s_i].iov_len - s_ii); - memcpy(dst[d_i].iov_base + d_ii, src[s_i].iov_base + s_ii, min_i); - - d_ii += min_i; - if (d_ii == dst[d_i].iov_len) { - d_ii = 0; - d_i++; - } - - s_ii += min_i; - if (s_ii == src[s_i].iov_len) { - s_ii = 0; - s_i++; - } + return iov_range_copy(dst, dcnt, 0, src, scnt, 0, UINT32_MAX); +} - left -= min_i; +/* based on the amusing discussion @ https://rusty.ozlabs.org/?p=560 */ +static bool +memeqzero(const void *data, size_t length) +{ + const unsigned char *p = data; + size_t len; + + /* Check first 16 bytes manually */ + for (len = 0; len < 16; len++) { + if (!length) + return true; + if (*p) + return false; + p++; + length--; } - return ret; + /* Now we know that's zero, memcmp with self. */ + return memcmp(data, p, length) == 0; } static inline int mem_0filled(const char *buf, size_t size) { - int i = 0; - int ret = 0; - - for (i = 0; i < size; i++) { - ret = buf[i]; - if (ret) - break; - } - - return ret; + return !memeqzero(buf, size); } static inline int -iov_0filled(struct iovec *vector, int count) +iov_0filled(const struct iovec *vector, int count) { int i = 0; int ret = 0; @@ -675,19 +801,6 @@ iov_0filled(struct iovec *vector, int count) return ret; } -static inline void * -memdup(const void *ptr, size_t size) -{ - void *newptr = NULL; - - newptr = GF_MALLOC(size, gf_common_mt_memdup); - if (!newptr) - return NULL; - - memcpy(newptr, ptr, size); - return newptr; -} - typedef enum { gf_timefmt_default = 0, gf_timefmt_FT = 0, /* YYYY-MM-DD hh:mm:ss */ @@ -700,7 +813,7 @@ typedef enum { } gf_timefmts; static inline char * -gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) +gf_time_fmt_tv(char *dst, size_t sz_dst, struct timeval *tv, unsigned int fmt) { extern void _gf_timestuff(const char ***, const char ***); static gf_timefmts timefmt_last = (gf_timefmts)-1; @@ -708,6 +821,8 @@ gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) static const char **zeros; struct tm tm, *res; int localtime = 0; + int len = 0; + int pos = 0; if (timefmt_last == ((gf_timefmts)-1)) { _gf_timestuff(&fmts, &zeros); @@ -717,15 +832,35 @@ gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) fmt = gf_timefmt_default; } localtime = gf_log_get_localtime(); - res = localtime ? localtime_r(&utime, &tm) : gmtime_r(&utime, &tm); - if (utime && (res != NULL)) { - strftime(dst, sz_dst, fmts[fmt], &tm); + res = localtime ? localtime_r(&tv->tv_sec, &tm) + : gmtime_r(&tv->tv_sec, &tm); + if (tv->tv_sec && (res != NULL)) { + len = strftime(dst, sz_dst, fmts[fmt], &tm); + if (len == 0) + return dst; + pos += len; + if (tv->tv_usec >= 0) { + len = snprintf(dst + pos, sz_dst - pos, ".%" GF_PRI_SUSECONDS, + tv->tv_usec); + if (len >= sz_dst - pos) + return dst; + pos += len; + } + strftime(dst + pos, sz_dst - pos, " %z", &tm); } else { strncpy(dst, "N/A", sz_dst); } return dst; } +static inline char * +gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt) +{ + struct timeval tv = {utime, -1}; + + return gf_time_fmt_tv(dst, sz_dst, &tv, fmt); +} + /* This function helps us use gfid (unique identity) to generate inode's unique * number in glusterfs. */ @@ -806,10 +941,6 @@ gf_string2uint32_base10(const char *str, uint32_t *n); int gf_string2uint64_base10(const char *str, uint64_t *n); int -gf_string2bytesize(const char *str, uint64_t *n); -int -gf_string2bytesize_size(const char *str, size_t *n); -int gf_string2bytesize_uint64(const char *str, uint64_t *n); int gf_string2bytesize_int64(const char *str, int64_t *n); @@ -820,6 +951,8 @@ gf_string2percent_or_bytesize(const char *str, double *n, int gf_string2boolean(const char *str, gf_boolean_t *b); int +gf_strn2boolean(const char *str, const int len, gf_boolean_t *b); +int gf_string2percent(const char *str, double *n); int gf_string2time(const char *str, uint32_t *n); @@ -830,12 +963,12 @@ int gf_unlockfd(int fd); int -get_checksum_for_file(int fd, uint32_t *checksum); +get_checksum_for_file(int fd, uint32_t *checksum, int op_version); int log_base2(unsigned long x); int -get_checksum_for_path(char *path, uint32_t *checksum); +get_checksum_for_path(char *path, uint32_t *checksum, int op_version); int get_file_mtime(const char *path, time_t *stamp); char * @@ -871,7 +1004,8 @@ valid_ipv4_address(char *address, int length, gf_boolean_t wildcard_acc); char valid_ipv6_address(char *address, int length, gf_boolean_t wildcard_acc); char -valid_internet_address(char *address, gf_boolean_t wildcard_acc); +valid_internet_address(char *address, gf_boolean_t wildcard_acc, + gf_boolean_t cidr); gf_boolean_t valid_mount_auth_address(char *address); gf_boolean_t @@ -905,8 +1039,6 @@ gf_is_str_int(const char *value); char *gf_uint64_2human_readable(uint64_t); int -get_vol_type(int type, int dist_count, int brick_count); -int validate_brick_name(char *brick); char * get_host_name(char *word, char **host); @@ -945,10 +1077,24 @@ gf_set_timestamp(const char *src, const char *dest); int gf_thread_create(pthread_t *thread, const pthread_attr_t *attr, - void *(*start_routine)(void *), void *arg, const char *name); + void *(*start_routine)(void *), void *arg, const char *name, + ...) __attribute__((__format__(__printf__, 5, 6))); + +int +gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg, const char *name, + va_list args); int gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *), - void *arg, const char *name); + void *arg, const char *name, ...) + __attribute__((__format__(__printf__, 4, 5))); + +void +gf_thread_set_name(pthread_t thread, const char *name, ...) + __attribute__((__format__(__printf__, 2, 3))); + +void +gf_thread_set_vname(pthread_t thread, const char *name, va_list args); gf_boolean_t gf_is_pid_running(int pid); gf_boolean_t @@ -1044,8 +1190,6 @@ gf_getgrouplist(const char *user, gid_t group, gid_t **groups); int glusterfs_compute_sha256(const unsigned char *content, size_t size, char *sha256_hash); -char * -get_struct_variable(int mem_num, gf_gsync_status_t *sts_val); char * gf_strncpy(char *dest, const char *src, const size_t dest_size); @@ -1059,4 +1203,54 @@ gf_replace_old_iatt_in_dict(struct _dict *); int gf_replace_new_iatt_in_dict(struct _dict *); +xlator_cmdline_option_t * +find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args); + +int +gf_d_type_from_ia_type(ia_type_t type); + +int +gf_syncfs(int fd); + +int +gf_nanosleep(uint64_t nsec); + +static inline time_t +gf_time(void) +{ + return time(NULL); +} + +/* Return delta value in microseconds. */ + +static inline double +gf_tvdiff(struct timeval *start, struct timeval *end) +{ + struct timeval t; + + if (start->tv_usec > end->tv_usec) + t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000; + else + t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec; + + return (double)(t.tv_sec - start->tv_sec) * 1e6 + + (double)(t.tv_usec - start->tv_usec); +} + +/* Return delta value in nanoseconds. */ + +static inline double +gf_tsdiff(struct timespec *start, struct timespec *end) +{ + struct timespec t; + + if (start->tv_nsec > end->tv_nsec) + t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000; + else + t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec; + + return (double)(t.tv_sec - start->tv_sec) * 1e9 + + (double)(t.tv_nsec - start->tv_nsec); +} + #endif /* _COMMON_UTILS_H */ diff --git a/libglusterfs/src/compat-errno.h b/libglusterfs/src/glusterfs/compat-errno.h index c4ab09ab0d5..c4ab09ab0d5 100644 --- a/libglusterfs/src/compat-errno.h +++ b/libglusterfs/src/glusterfs/compat-errno.h diff --git a/libglusterfs/src/compat-uuid.h b/libglusterfs/src/glusterfs/compat-uuid.h index 6e7fdefbfab..6e7fdefbfab 100644 --- a/libglusterfs/src/compat-uuid.h +++ b/libglusterfs/src/glusterfs/compat-uuid.h diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/glusterfs/compat.h index 38c07b5ae7c..bf00d903152 100644 --- a/libglusterfs/src/compat.h +++ b/libglusterfs/src/glusterfs/compat.h @@ -40,10 +40,6 @@ #define GF_XATTR_NAME_MAX XATTR_NAME_MAX #endif /* GF_LINUX_HOST_OS */ -#ifdef HAVE_XATTR_H -#include <sys/xattr.h> -#endif - /* * Define the fallocate flags in case we do not have the header. This also * accounts for older systems that do not define FALLOC_FL_PUNCH_HOLE. @@ -116,6 +112,25 @@ #include <limits.h> #include <libgen.h> +/* + * This is where things like off64_t are defined. + * So include it before declaring _OFF64_T_DECLARED. + * If the freebsd version has support for off64_t + * including stdio.h should be sufficient. + */ +#include <stdio.h> + +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ #ifndef XATTR_CREATE enum { @@ -513,6 +528,9 @@ dirname_r(char *path); /* Use run API, see run.h */ #include <stdlib.h> /* system(), mkostemp() */ #include <stdio.h> /* popen() */ +#ifdef GF_LINUX_HOST_OS +#include <sys/sysmacros.h> +#endif #pragma GCC poison system mkostemp popen #endif diff --git a/libglusterfs/src/daemon.h b/libglusterfs/src/glusterfs/daemon.h index 48850800b5e..48850800b5e 100644 --- a/libglusterfs/src/daemon.h +++ b/libglusterfs/src/glusterfs/daemon.h diff --git a/libglusterfs/src/default-args.h b/libglusterfs/src/glusterfs/default-args.h index e5915c4c05e..ca7526fcab6 100644 --- a/libglusterfs/src/default-args.h +++ b/libglusterfs/src/glusterfs/default-args.h @@ -15,7 +15,7 @@ #ifndef _DEFAULT_ARGS_H #define _DEFAULT_ARGS_H -#include "xlator.h" +#include "glusterfs/xlator.h" int args_lookup_cbk_store(default_args_cbk_t *args, int32_t op_ret, @@ -234,6 +234,12 @@ void args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno, struct gf_lease *lease, dict_t *xdata); +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + void args_cbk_wipe(default_args_cbk_t *args_cbk); @@ -439,6 +445,11 @@ args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode, int args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata); +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off_t off64_out, size_t len, + uint32_t flags, dict_t *xdata); + void args_cbk_init(default_args_cbk_t *args_cbk); #endif /* _DEFAULT_ARGS_H */ diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/glusterfs/defaults.h index 8583935e069..5a818eeb91a 100644 --- a/libglusterfs/src/defaults.h +++ b/libglusterfs/src/glusterfs/defaults.h @@ -15,7 +15,7 @@ #ifndef _DEFAULTS_H #define _DEFAULTS_H -#include "xlator.h" +#include "glusterfs/xlator.h" typedef struct { int op_ret; @@ -48,10 +48,20 @@ typedef struct { } default_args_cbk_t; typedef struct { - loc_t loc; /* @old in rename(), link() */ - loc_t loc2; /* @new in rename(), link() */ - fd_t *fd; + loc_t loc; /* @old in rename(), link() */ + loc_t loc2; /* @new in rename(), link() */ + fd_t *fd; /* for all the fd based ops */ + fd_t *fd_dst; /* Only for copy_file_range destination */ off_t offset; + /* + * According to the man page of copy_file_range, + * the offsets for source and destination file + * are of type loff_t. But the type loff_t is + * linux specific and is actual a typedef of + * off64_t. + */ + off64_t off_in; /* For copy_file_range source fd */ + off64_t off_out; /* For copy_file_range destination fd only */ int mask; size_t size; mode_t mode; @@ -323,6 +333,11 @@ int32_t default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +int32_t +default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, dict_t *xdata); + /* Resume */ int32_t default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key, @@ -542,6 +557,11 @@ default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, int32_t count, off_t off, struct iobref *iobref, dict_t *xattr, dict_t *xdata); +int32_t +default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off_t off64_in, fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, dict_t *xdata); + /* _cbk_resume */ int32_t @@ -813,6 +833,13 @@ int32_t default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +int32_t +default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + /* _CBK */ int32_t default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -1072,6 +1099,12 @@ default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *postbuf, dict_t *xdata); int32_t +default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + +int32_t default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno); int32_t @@ -1231,6 +1264,9 @@ int32_t default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno); int32_t +default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno); + +int32_t default_mem_acct_init(xlator_t *this); void diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/glusterfs/dict.h index 45cdb33ec9e..d0467c6dfb6 100644 --- a/libglusterfs/src/dict.h +++ b/libglusterfs/src/glusterfs/dict.h @@ -15,13 +15,38 @@ #include <sys/uio.h> #include <pthread.h> -#include "common-utils.h" -#include "libglusterfs-messages.h" +#include "glusterfs/common-utils.h" typedef struct _data data_t; typedef struct _dict dict_t; typedef struct _data_pair data_pair_t; +#define dict_set_sizen(this, key, value) dict_setn(this, key, SLEN(key), value) + +#define dict_add_sizen(this, key, value) dict_addn(this, key, SLEN(key), value) + +#define dict_get_sizen(this, key) dict_getn(this, key, SLEN(key)) + +#define dict_del_sizen(this, key) dict_deln(this, key, SLEN(key)) + +#define dict_set_str_sizen(this, key, str) \ + dict_set_strn(this, key, SLEN(key), str) + +#define dict_set_sizen_str_sizen(this, key, str) \ + dict_set_nstrn(this, key, SLEN(key), str, SLEN(str)) + +#define dict_set_dynstr_sizen(this, key, str) \ + dict_set_dynstrn(this, key, SLEN(key), str) + +#define dict_get_str_sizen(this, key, str) \ + dict_get_strn(this, key, SLEN(key), str) + +#define dict_get_int32_sizen(this, key, val) \ + dict_get_int32n(this, key, SLEN(key), val) + +#define dict_set_int32_sizen(this, key, val) \ + dict_set_int32n(this, key, SLEN(key), val) + #define GF_PROTOCOL_DICT_SERIALIZE(this, from_dict, to, len, ope, labl) \ do { \ int _ret = 0; \ @@ -62,13 +87,15 @@ typedef struct _data_pair data_pair_t; #define DICT_MAX_FLAGS 256 #define DICT_FLAG_SET 1 #define DICT_FLAG_CLEAR 0 +#define DICT_HDR_LEN 4 +#define DICT_DATA_HDR_KEY_LEN 4 +#define DICT_DATA_HDR_VAL_LEN 4 struct _data { char *data; gf_atomic_t refcount; - gf_lock_t lock; gf_dict_data_type_t data_type; - int32_t len; + uint32_t len; gf_boolean_t is_static; }; @@ -88,12 +115,12 @@ struct _dict { gf_atomic_t refcount; data_pair_t **members; data_pair_t *members_list; - char *extra_free; char *extra_stdfree; gf_lock_t lock; data_pair_t *members_internal; data_pair_t free_pair; - gf_boolean_t free_pair_in_use; + /* Variable to store total keylen + value->len */ + uint32_t totkvlen; }; typedef gf_boolean_t (*dict_match_t)(dict_t *d, char *k, data_t *v, void *data); @@ -108,6 +135,7 @@ int32_t dict_set(dict_t *this, char *key, data_t *value); int32_t dict_setn(dict_t *this, char *key, const int keylen, data_t *value); + /* function to set a new key/value pair (without checking for duplicate) */ int32_t dict_add(dict_t *this, char *key, data_t *value); @@ -115,8 +143,6 @@ int32_t dict_addn(dict_t *this, char *key, const int keylen, data_t *value); int dict_get_with_ref(dict_t *this, char *key, data_t **data); -int -dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data); data_t * dict_get(dict_t *this, char *key); data_t * @@ -241,8 +267,6 @@ int dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp); dict_t * dict_copy(dict_t *this, dict_t *new); -dict_t * -get_new_dict(void); int dict_keys_join(void *value, int size, dict_t *dict, int (*filter_fn)(char *key)); @@ -362,6 +386,11 @@ GF_MUST_CHECK int dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static); GF_MUST_CHECK int dict_get_iatt(dict_t *this, char *key, struct iatt *iatt); +GF_MUST_CHECK int +dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata, + bool is_static); +GF_MUST_CHECK int +dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata); void dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain); @@ -385,4 +414,7 @@ are_dicts_equal(dict_t *one, dict_t *two, gf_boolean_t (*value_ignore)(char *k)); int dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result); + +int +dict_serialized_length_lk(dict_t *this); #endif diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/glusterfs/event-history.h index 5f0776ba9b0..f0e0422418e 100644 --- a/libglusterfs/src/event-history.h +++ b/libglusterfs/src/glusterfs/event-history.h @@ -11,8 +11,10 @@ #ifndef _EH_H #define _EH_H -#include "mem-types.h" -#include "circ-buff.h" +#include <pthread.h> // for pthread_mutex_t +#include <stddef.h> // for size_t +#include "glusterfs/circ-buff.h" // for buffer_t, circular_buffer_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t struct event_hist { buffer_t *buffer; diff --git a/libglusterfs/src/events.h b/libglusterfs/src/glusterfs/events.h index 74c5326427b..74c5326427b 100644 --- a/libglusterfs/src/events.h +++ b/libglusterfs/src/glusterfs/events.h diff --git a/libglusterfs/src/fd-lk.h b/libglusterfs/src/glusterfs/fd-lk.h index 735f05ec46c..76cc680306a 100644 --- a/libglusterfs/src/fd-lk.h +++ b/libglusterfs/src/glusterfs/fd-lk.h @@ -11,14 +11,10 @@ #ifndef _FD_LK_H #define _FD_LK_H -#include "fd.h" -#include "locking.h" -#include "list.h" -#include "logging.h" -#include "mem-pool.h" -#include "mem-types.h" -#include "glusterfs.h" -#include "common-utils.h" +#include "glusterfs/fd.h" +#include "glusterfs/locking.h" +#include "glusterfs/list.h" +#include "glusterfs/glusterfs.h" #define get_lk_type(type) \ type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK") diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/glusterfs/fd.h index e885ecb7786..3ffaaa60504 100644 --- a/libglusterfs/src/fd.h +++ b/libglusterfs/src/glusterfs/fd.h @@ -11,13 +11,12 @@ #ifndef _FD_H #define _FD_H -#include "list.h" +#include "glusterfs/list.h" #include <sys/types.h> #include <unistd.h> -#include "glusterfs.h" -#include "locking.h" -#include "fd-lk.h" -#include "common-utils.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/locking.h" +#include "glusterfs/fd-lk.h" #define GF_ANON_FD_NO -2 #define GF_ANON_FD_FLAGS (O_RDWR | O_LARGEFILE) @@ -77,8 +76,8 @@ typedef struct _fdtable fdtable_t; */ #define GF_FDENTRY_ALLOCATED -2 -#include "logging.h" -#include "xlator.h" +#include "glusterfs/logging.h" +#include "glusterfs/xlator.h" void gf_fd_put(fdtable_t *fdtable, int32_t fd); @@ -107,6 +106,9 @@ fd_ref(fd_t *fd); void fd_unref(fd_t *fd); +void +fd_close(fd_t *fd); + fd_t * fd_create(struct _inode *inode, pid_t pid); diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/glusterfs/gf-dirent.h index 95403e9eabd..e358da30f58 100644 --- a/libglusterfs/src/gf-dirent.h +++ b/libglusterfs/src/glusterfs/gf-dirent.h @@ -11,8 +11,8 @@ #ifndef _GF_DIRENT_H #define _GF_DIRENT_H -#include "iatt.h" -#include "inode.h" +#include "glusterfs/iatt.h" +#include "glusterfs/inode.h" #define gf_dirent_size(name) (sizeof(gf_dirent_t) + strlen(name) + 1) diff --git a/libglusterfs/src/gf-event.h b/libglusterfs/src/glusterfs/gf-event.h index 5c3724cc953..40f8fbdf10a 100644 --- a/libglusterfs/src/gf-event.h +++ b/libglusterfs/src/glusterfs/gf-event.h @@ -12,6 +12,8 @@ #define _GF_EVENT_H_ #include <pthread.h> +#include "common-utils.h" +#include "list.h" struct event_pool; struct event_ops; @@ -22,13 +24,17 @@ struct event_data { int gen; } __attribute__((__packed__, __may_alias__)); -typedef int (*event_handler_t)(int fd, int idx, int gen, void *data, - int poll_in, int poll_out, int poll_err); +typedef void (*event_handler_t)(int fd, int idx, int gen, void *data, + int poll_in, int poll_out, int poll_err, + char event_thread_exit); #define EVENT_EPOLL_TABLES 1024 #define EVENT_EPOLL_SLOTS 1024 #define EVENT_MAX_THREADS 1024 +/* See rpcsvc.h to check why. */ +GF_STATIC_ASSERT(EVENT_MAX_THREADS % __BITS_PER_LONG == 0); + struct event_pool { struct event_ops *ops; @@ -40,6 +46,13 @@ struct event_pool { struct event_slot_epoll *ereg[EVENT_EPOLL_TABLES]; int slots_used[EVENT_EPOLL_TABLES]; + struct list_head poller_death; + int poller_death_sliced; /* track whether the list of fds interested + * poller_death is sliced. If yes, new thread death + * notification has to wait till the list is added + * back + */ + int poller_gen; int used; int changed; @@ -52,8 +65,8 @@ struct event_pool { /* NOTE: Currently used only when event processing is done using * epoll. */ int eventthreadcount; /* number of event threads to execute. */ - pthread_t pollers[EVENT_MAX_THREADS]; /* poller thread_id store, - * and live status */ + pthread_t pollers[EVENT_MAX_THREADS]; /* poller thread_id store, and live + status */ int destroy; int activethreadcount; @@ -81,7 +94,7 @@ struct event_ops { int (*event_register)(struct event_pool *event_pool, int fd, event_handler_t handler, void *data, int poll_in, - int poll_out); + int poll_out, char notify_poller_death); int (*event_select_on)(struct event_pool *event_pool, int fd, int idx, int poll_in, int poll_out); @@ -101,26 +114,27 @@ struct event_ops { }; struct event_pool * -event_pool_new(int count, int eventthreadcount); +gf_event_pool_new(int count, int eventthreadcount); int -event_select_on(struct event_pool *event_pool, int fd, int idx, int poll_in, - int poll_out); +gf_event_select_on(struct event_pool *event_pool, int fd, int idx, int poll_in, + int poll_out); int -event_register(struct event_pool *event_pool, int fd, event_handler_t handler, - void *data, int poll_in, int poll_out); +gf_event_register(struct event_pool *event_pool, int fd, + event_handler_t handler, void *data, int poll_in, + int poll_out, char notify_poller_death); int -event_unregister(struct event_pool *event_pool, int fd, int idx); +gf_event_unregister(struct event_pool *event_pool, int fd, int idx); int -event_unregister_close(struct event_pool *event_pool, int fd, int idx); +gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx); int -event_dispatch(struct event_pool *event_pool); +gf_event_dispatch(struct event_pool *event_pool); int -event_reconfigure_threads(struct event_pool *event_pool, int value); +gf_event_reconfigure_threads(struct event_pool *event_pool, int value); int -event_pool_destroy(struct event_pool *event_pool); +gf_event_pool_destroy(struct event_pool *event_pool); int -event_dispatch_destroy(struct event_pool *event_pool); +gf_event_dispatch_destroy(struct event_pool *event_pool); int -event_handled(struct event_pool *event_pool, int fd, int idx, int gen); +gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen); #endif /* _GF_EVENT_H_ */ diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/glusterfs/gidcache.h index 3a7de47dec7..ddaabd765b5 100644 --- a/libglusterfs/src/gidcache.h +++ b/libglusterfs/src/glusterfs/gidcache.h @@ -11,8 +11,8 @@ #ifndef __GIDCACHE_H__ #define __GIDCACHE_H__ -#include "glusterfs.h" -#include "locking.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/locking.h" /* * TBD: make the cache size tunable diff --git a/libglusterfs/src/glfs-message-id.h b/libglusterfs/src/glusterfs/glfs-message-id.h index 7b25918b084..a1a16ca1efb 100644 --- a/libglusterfs/src/glfs-message-id.h +++ b/libglusterfs/src/glusterfs/glfs-message-id.h @@ -89,9 +89,11 @@ enum _msgid_comp { GLFS_MSGID_COMP(SDFS, 1), GLFS_MSGID_COMP(QUIESCE, 1), GLFS_MSGID_COMP(TA, 1), + GLFS_MSGID_COMP(SNAPVIEW_CLIENT, 1), GLFS_MSGID_COMP(TEMPLATE, 1), GLFS_MSGID_COMP(UTIME, 1), - + GLFS_MSGID_COMP(SNAPVIEW_SERVER, 1), + GLFS_MSGID_COMP(CVLT, 1), /* --- new segments for messages goes above this line --- */ GLFS_MSGID_END diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/glusterfs/globals.h index c5595ca7d7d..b22eaae6c2f 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/glusterfs/globals.h @@ -45,7 +45,7 @@ 1 /* MIN is the fresh start op-version, mostly \ should not change */ #define GD_OP_VERSION_MAX \ - GD_OP_VERSION_5_0 /* MAX VERSION is the maximum \ + GD_OP_VERSION_9_0 /* MAX VERSION is the maximum \ count in VME table, should \ keep changing with \ introduction of newer \ @@ -111,10 +111,23 @@ #define GD_OP_VERSION_5_0 50000 /* Op-version for GlusterFS 5.0 */ +#define GD_OP_VERSION_5_4 50400 /* Op-version for GlusterFS 5.4 */ + +#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ + +#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ +#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */ +#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */ +#define GD_OP_VERSION_7_3 70300 /* Op-version for GlusterFS 7.3 */ + +#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */ + +#define GD_OP_VERSION_9_0 90000 /* Op-version for GlusterFS 9.0 */ + #define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0 -#include "xlator.h" -#include "options.h" +#include "glusterfs/xlator.h" +#include "glusterfs/options.h" /* THIS */ #define THIS (*__glusterfs_this_location()) @@ -124,7 +137,7 @@ xlator_t ** __glusterfs_this_location(void); xlator_t * glusterfs_this_get(void); -int +void glusterfs_this_set(xlator_t *); extern xlator_t global_xlator; @@ -133,13 +146,11 @@ extern struct volume_options global_xl_options[]; /* syncopctx */ void * syncopctx_getctx(void); -int -syncopctx_setctx(void *ctx); /* task */ void * synctask_get(void); -int +void synctask_set(void *); /* uuid_buf */ @@ -158,6 +169,9 @@ glusterfs_leaseid_exist(void); int glusterfs_globals_init(glusterfs_ctx_t *ctx); +void +gf_thread_needs_cleanup(void); + struct tvec_base * glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx); void diff --git a/libglusterfs/src/glusterfs-acl.h b/libglusterfs/src/glusterfs/glusterfs-acl.h index 8c731fc771b..987bf5fab0b 100644 --- a/libglusterfs/src/glusterfs-acl.h +++ b/libglusterfs/src/glusterfs/glusterfs-acl.h @@ -24,7 +24,7 @@ #include <stdint.h> #include <sys/types.h> /* For uid_t */ -#include "locking.h" /* For gf_lock_t in struct posix_acl_conf */ +#include "glusterfs/locking.h" /* For gf_lock_t in struct posix_acl_conf */ #define ACL_PROGRAM 100227 #define ACLV3_VERSION 3 @@ -143,7 +143,7 @@ gf_posix_acl_get_key(const acl_type_t type) return acl_key; } -static inline const acl_type_t +static inline acl_type_t gf_posix_acl_get_type(const char *key) { acl_type_t type = 0; diff --git a/libglusterfs/src/glusterfs/glusterfs-fops.h b/libglusterfs/src/glusterfs/glusterfs-fops.h new file mode 100644 index 00000000000..030b2701608 --- /dev/null +++ b/libglusterfs/src/glusterfs/glusterfs-fops.h @@ -0,0 +1,241 @@ +/* + Copyright (c) 2008-2019 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERFS_FOPS_H_ +#define _GLUSTERFS_FOPS_H_ + +#include <glusterfs/compat.h> + +enum glusterfs_fop_t { + GF_FOP_NULL = 0, + GF_FOP_STAT = 0 + 1, + GF_FOP_READLINK = 0 + 2, + GF_FOP_MKNOD = 0 + 3, + GF_FOP_MKDIR = 0 + 4, + GF_FOP_UNLINK = 0 + 5, + GF_FOP_RMDIR = 0 + 6, + GF_FOP_SYMLINK = 0 + 7, + GF_FOP_RENAME = 0 + 8, + GF_FOP_LINK = 0 + 9, + GF_FOP_TRUNCATE = 0 + 10, + GF_FOP_OPEN = 0 + 11, + GF_FOP_READ = 0 + 12, + GF_FOP_WRITE = 0 + 13, + GF_FOP_STATFS = 0 + 14, + GF_FOP_FLUSH = 0 + 15, + GF_FOP_FSYNC = 0 + 16, + GF_FOP_SETXATTR = 0 + 17, + GF_FOP_GETXATTR = 0 + 18, + GF_FOP_REMOVEXATTR = 0 + 19, + GF_FOP_OPENDIR = 0 + 20, + GF_FOP_FSYNCDIR = 0 + 21, + GF_FOP_ACCESS = 0 + 22, + GF_FOP_CREATE = 0 + 23, + GF_FOP_FTRUNCATE = 0 + 24, + GF_FOP_FSTAT = 0 + 25, + GF_FOP_LK = 0 + 26, + GF_FOP_LOOKUP = 0 + 27, + GF_FOP_READDIR = 0 + 28, + GF_FOP_INODELK = 0 + 29, + GF_FOP_FINODELK = 0 + 30, + GF_FOP_ENTRYLK = 0 + 31, + GF_FOP_FENTRYLK = 0 + 32, + GF_FOP_XATTROP = 0 + 33, + GF_FOP_FXATTROP = 0 + 34, + GF_FOP_FGETXATTR = 0 + 35, + GF_FOP_FSETXATTR = 0 + 36, + GF_FOP_RCHECKSUM = 0 + 37, + GF_FOP_SETATTR = 0 + 38, + GF_FOP_FSETATTR = 0 + 39, + GF_FOP_READDIRP = 0 + 40, + GF_FOP_FORGET = 0 + 41, + GF_FOP_RELEASE = 0 + 42, + GF_FOP_RELEASEDIR = 0 + 43, + GF_FOP_GETSPEC = 0 + 44, + GF_FOP_FREMOVEXATTR = 0 + 45, + GF_FOP_FALLOCATE = 0 + 46, + GF_FOP_DISCARD = 0 + 47, + GF_FOP_ZEROFILL = 0 + 48, + GF_FOP_IPC = 0 + 49, + GF_FOP_SEEK = 0 + 50, + GF_FOP_LEASE = 0 + 51, + GF_FOP_COMPOUND = 0 + 52, + GF_FOP_GETACTIVELK = 0 + 53, + GF_FOP_SETACTIVELK = 0 + 54, + GF_FOP_PUT = 0 + 55, + GF_FOP_ICREATE = 0 + 56, + GF_FOP_NAMELINK = 0 + 57, + GF_FOP_COPY_FILE_RANGE = 0 + 58, + GF_FOP_MAXVALUE = 0 + 59, +}; +typedef enum glusterfs_fop_t glusterfs_fop_t; + +enum glusterfs_event_t { + GF_EVENT_PARENT_UP = 1, + GF_EVENT_POLLIN = 1 + 1, + GF_EVENT_POLLOUT = 1 + 2, + GF_EVENT_POLLERR = 1 + 3, + GF_EVENT_CHILD_UP = 1 + 4, + GF_EVENT_CHILD_DOWN = 1 + 5, + GF_EVENT_CHILD_CONNECTING = 1 + 6, + GF_EVENT_CLEANUP = 9, + GF_EVENT_TRANSPORT_CONNECTED = 9 + 1, + GF_EVENT_VOLFILE_MODIFIED = 9 + 2, + GF_EVENT_GRAPH_NEW = 9 + 3, + GF_EVENT_TRANSLATOR_INFO = 9 + 4, + GF_EVENT_TRANSLATOR_OP = 9 + 5, + GF_EVENT_AUTH_FAILED = 9 + 6, + GF_EVENT_VOLUME_DEFRAG = 9 + 7, + GF_EVENT_PARENT_DOWN = 9 + 8, + GF_EVENT_VOLUME_BARRIER_OP = 9 + 9, + GF_EVENT_UPCALL = 9 + 10, + GF_EVENT_SCRUB_STATUS = 9 + 11, + GF_EVENT_SOME_DESCENDENT_DOWN = 9 + 12, + GF_EVENT_SCRUB_ONDEMAND = 9 + 13, + GF_EVENT_SOME_DESCENDENT_UP = 9 + 14, + GF_EVENT_CHILD_PING = 9 + 15, + GF_EVENT_MAXVAL = 9 + 16, +}; +typedef enum glusterfs_event_t glusterfs_event_t; + +enum gf_op_type_t { + GF_OP_TYPE_NULL = 0, + GF_OP_TYPE_FOP = 0 + 1, + GF_OP_TYPE_MGMT = 0 + 2, + GF_OP_TYPE_MAX = 0 + 3, +}; +typedef enum gf_op_type_t gf_op_type_t; + +enum glusterfs_lk_cmds_t { + GF_LK_GETLK = 0, + GF_LK_SETLK = 0 + 1, + GF_LK_SETLKW = 0 + 2, + GF_LK_RESLK_LCK = 0 + 3, + GF_LK_RESLK_LCKW = 0 + 4, + GF_LK_RESLK_UNLCK = 0 + 5, + GF_LK_GETLK_FD = 0 + 6, +}; +typedef enum glusterfs_lk_cmds_t glusterfs_lk_cmds_t; + +enum glusterfs_lk_types_t { + GF_LK_F_RDLCK = 0, + GF_LK_F_WRLCK = 0 + 1, + GF_LK_F_UNLCK = 0 + 2, + GF_LK_EOL = 0 + 3, +}; +typedef enum glusterfs_lk_types_t glusterfs_lk_types_t; + +enum gf_lease_types_t { + NONE = 0, + GF_RD_LEASE = 1, + GF_RW_LEASE = 2, + GF_LEASE_MAX_TYPE = 2 + 1, +}; +typedef enum gf_lease_types_t gf_lease_types_t; + +enum gf_lease_cmds_t { + GF_GET_LEASE = 1, + GF_SET_LEASE = 2, + GF_UNLK_LEASE = 3, +}; +typedef enum gf_lease_cmds_t gf_lease_cmds_t; + +#define LEASE_ID_SIZE 16 /* 128bits */ + +struct gf_lease { + gf_lease_cmds_t cmd; + gf_lease_types_t lease_type; + char lease_id[LEASE_ID_SIZE]; + u_int lease_flags; +}; +typedef struct gf_lease gf_lease; + +enum glusterfs_lk_recovery_cmds_t { + F_RESLK_LCK = 200, + F_RESLK_LCKW = 200 + 1, + F_RESLK_UNLCK = 200 + 2, + F_GETLK_FD = 200 + 3, +}; +typedef enum glusterfs_lk_recovery_cmds_t glusterfs_lk_recovery_cmds_t; + +enum gf_lk_domain_t { + GF_LOCK_POSIX = 0, + GF_LOCK_INTERNAL = 1, +}; +typedef enum gf_lk_domain_t gf_lk_domain_t; + +enum entrylk_cmd { + ENTRYLK_LOCK = 0, + ENTRYLK_UNLOCK = 1, + ENTRYLK_LOCK_NB = 2, +}; +typedef enum entrylk_cmd entrylk_cmd; + +enum entrylk_type { + ENTRYLK_RDLCK = 0, + ENTRYLK_WRLCK = 1, +}; +typedef enum entrylk_type entrylk_type; +#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */ +#define GF_LKOWNER_BUF_SIZE \ + ((GF_MAX_LOCK_OWNER_LEN * 2) + (GF_MAX_LOCK_OWNER_LEN / 8)) + +struct gf_lkowner_t { + int len; + char data[GF_MAX_LOCK_OWNER_LEN]; +}; +typedef struct gf_lkowner_t gf_lkowner_t; + +enum gf_xattrop_flags_t { + GF_XATTROP_ADD_ARRAY = 0, + GF_XATTROP_ADD_ARRAY64 = 1, + GF_XATTROP_OR_ARRAY = 2, + GF_XATTROP_AND_ARRAY = 3, + GF_XATTROP_GET_AND_SET = 4, + GF_XATTROP_ADD_ARRAY_WITH_DEFAULT = 5, + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT = 6, +}; +typedef enum gf_xattrop_flags_t gf_xattrop_flags_t; + +enum gf_seek_what_t { + GF_SEEK_DATA = 0, + GF_SEEK_HOLE = 1, +}; +typedef enum gf_seek_what_t gf_seek_what_t; + +enum gf_upcall_flags_t { + GF_UPCALL_NULL = 0, + GF_UPCALL = 1, + GF_UPCALL_CI_STAT = 2, + GF_UPCALL_CI_XATTR = 3, + GF_UPCALL_CI_RENAME = 4, + GF_UPCALL_CI_NLINK = 5, + GF_UPCALL_CI_FORGET = 6, + GF_UPCALL_LEASE_RECALL = 7, + GF_UPCALL_FLAGS_MAXVALUE = 8, +}; +typedef enum gf_upcall_flags_t gf_upcall_flags_t; + +enum gf_dict_data_type_t { + GF_DATA_TYPE_UNKNOWN = 0, + GF_DATA_TYPE_STR_OLD = 1, + GF_DATA_TYPE_INT = 2, + GF_DATA_TYPE_UINT = 3, + GF_DATA_TYPE_DOUBLE = 4, + GF_DATA_TYPE_STR = 5, + GF_DATA_TYPE_PTR = 6, + GF_DATA_TYPE_GFUUID = 7, + GF_DATA_TYPE_IATT = 8, + GF_DATA_TYPE_MDATA = 9, + GF_DATA_TYPE_MAX = 10, +}; +typedef enum gf_dict_data_type_t gf_dict_data_type_t; + +#endif /* !_GLUSTERFS_FOPS_H */ diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 376b7a7c673..e6425618b7f 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -14,6 +14,7 @@ #include <stdio.h> #include <string.h> #include <stdlib.h> +#include <stdbool.h> #include <netinet/in.h> #include <sys/socket.h> #include <sys/types.h> @@ -30,32 +31,21 @@ #include <limits.h> /* For PATH_MAX */ #include <openssl/sha.h> -#include "glusterfs-fops.h" /* generated XDR values for FOPs */ - -#ifndef IXDR_GET_LONG -#define IXDR_GET_LONG(buf) ((long)IXDR_GET_U_INT32(buf)) -#endif -#ifndef IXDR_PUT_LONG -#define IXDR_PUT_LONG(buf, v) ((long)IXDR_PUT_INT32(buf, (long)(v))) -#endif -#ifndef IXDR_GET_U_LONG -#define IXDR_GET_U_LONG(buf) ((u_long)IXDR_GET_LONG(buf)) -#endif -#ifndef IXDR_PUT_U_LONG -#define IXDR_PUT_U_LONG(buf, v) IXDR_PUT_LONG(buf, (long)(v)) -#endif - -#include "list.h" -#include "locking.h" -#include "logging.h" -#include "lkowner.h" -#include "compat-uuid.h" -#include "refcount.h" -#include "atomic.h" +#include "glusterfs/glusterfs-fops.h" +#include "glusterfs/list.h" +#include "glusterfs/locking.h" +#include "glusterfs/logging.h" +#include "glusterfs/lkowner.h" +#include "glusterfs/compat-uuid.h" +#include "glusterfs/refcount.h" +#include "glusterfs/atomic.h" #define GF_YES 1 #define GF_NO 0 +#define IS_ERROR(ret) ((ret) < 0) +#define IS_SUCCESS(ret) ((ret) >= 0) + #ifndef O_LARGEFILE /* savannah bug #20053, patch for compiling on darwin */ #define O_LARGEFILE 0100000 /* from bits/fcntl.h */ @@ -89,8 +79,8 @@ #define GF_OFF_MAX ((1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL) #define GLUSTERD_MAX_SNAP_NAME 255 -#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 10 - +#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 1024 +#define GLUSTERD_BRICK_SERVERS "cluster.brick-vol-servers" #define SLEN(str) (sizeof(str) - 1) #define ZR_MOUNTPOINT_OPT "mountpoint" @@ -178,6 +168,8 @@ #define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop" +#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock" + /* GlusterFS Internal FOP Indicator flags * (To pass information on the context in which a paritcular * fop is performed between translators) @@ -214,6 +206,8 @@ enum gf_internal_fop_indicator { #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count" #define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk" #define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count" +#define GLUSTERFS_INODELK_DOM_PREFIX "glusterfs.inodelk-dom-prefix" +#define GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS "glusterfs.multi-dom-lk-cnt-req" #define GFID_TO_PATH_KEY "glusterfs.gfid2path" #define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime" #define GF_XATTR_XTIME_PATTERN "trusted.glusterfs.*.xtime" @@ -226,6 +220,9 @@ enum gf_internal_fop_indicator { #define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup" #define QUOTA_READ_ONLY_KEY "trusted.glusterfs.quota.read-only" +/* ctime related */ +#define CTIME_MDATA_XDATA_KEY "set-ctime-mdata" + /* afr related */ #define AFR_XATTR_PREFIX "trusted.afr" @@ -270,7 +267,7 @@ enum gf_internal_fop_indicator { #define GLUSTERFS_RPC_REPLY_SIZE 24 -#define STARTING_EVENT_THREADS 1 +#define STARTING_EVENT_THREADS 2 #define DEFAULT_VAR_RUN_DIRECTORY DATADIR "/run/gluster" #define DEFAULT_GLUSTERFSD_MISC_DIRETORY DATADIR "/lib/misc/glusterfsd" @@ -284,7 +281,7 @@ enum gf_internal_fop_indicator { /* GlusterFS's maximum supported Auxiliary GIDs */ #define GF_MAX_AUX_GROUPS 65535 -#define GF_UUID_BUF_SIZE 50 +#define GF_UUID_BUF_SIZE 37 /* UUID_CANONICAL_FORM_LEN + NULL */ #define GF_UUID_BNAME_BUF_SIZE (320) /* (64 + 256) */ #define GF_REBALANCE_TID_KEY "rebalance-id" @@ -312,7 +309,6 @@ enum gf_internal_fop_indicator { #define DHT_SKIP_NON_LINKTO_UNLINK "unlink-only-if-dht-linkto-file" #define TIER_SKIP_NON_LINKTO_UNLINK "unlink-only-if-tier-linkto-file" -#define TIER_LINKFILE_GFID "tier-linkfile-gfid" #define DHT_SKIP_OPEN_FD_UNLINK "dont-unlink-for-open-fd" #define DHT_IATT_IN_XDATA_KEY "dht-get-iatt-in-xattr" #define DHT_MODE_IN_XDATA_KEY "dht-get-mode-in-xattr" @@ -325,6 +321,8 @@ enum gf_internal_fop_indicator { #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count" #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count" +#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count" + #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup" #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect" @@ -343,6 +341,8 @@ enum gf_internal_fop_indicator { #define GF_LOG_FLUSH_TIMEOUT_MAX_STR "300" #define GF_LOG_LOCALTIME_DEFAULT 0 +#define GF_NETWORK_TIMEOUT 42 + #define GF_BACKTRACE_LEN 4096 #define GF_BACKTRACE_FRAME_COUNT 7 @@ -359,6 +359,10 @@ enum gf_internal_fop_indicator { } while (0) #define GF_CS_OBJECT_SIZE "trusted.glusterfs.cs.object_size" +#define GF_CS_BLOCK_SIZE "trusted.glusterfs.cs.block_size" +#define GF_CS_NUM_BLOCKS "trusted.glusterfs.cs.num_blocks" + +#define GF_CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid" #define GF_CS_OBJECT_UPLOAD_COMPLETE "trusted.glusterfs.csou.complete" #define GF_CS_OBJECT_REMOTE "trusted.glusterfs.cs.remote" @@ -367,6 +371,10 @@ enum gf_internal_fop_indicator { #define GF_CS_OBJECT_STATUS "trusted.glusterfs.cs.status" #define GF_CS_OBJECT_REPAIR "trusted.glusterfs.cs.repair" +#define gf_boolean_t bool +#define _gf_false false +#define _gf_true true + typedef enum { GF_CS_LOCAL = 1, GF_CS_REMOTE = 2, @@ -417,7 +425,7 @@ static const char *const FOP_PRI_STRINGS[] = {"HIGH", "NORMAL", "LOW", "LEAST"}; static inline const char * fop_pri_to_string(gf_fop_pri_t pri) { - if (pri < 0) + if (IS_ERROR(pri)) return "UNSPEC"; if (pri >= GF_FOP_PRI_MAX) @@ -458,6 +466,8 @@ typedef struct _server_cmdline server_cmdline_t; #define GF_OPTION_DISABLE _gf_false #define GF_OPTION_DEFERRED 2 +typedef enum { _gf_none, _gf_memcheck, _gf_drd } gf_valgrind_tool; + struct _cmd_args { /* basic options */ char *volfile_server; @@ -522,6 +532,8 @@ struct _cmd_args { pid_t client_pid; int client_pid_set; unsigned uid_map_root; + int32_t lru_limit; + int32_t invalidate_limit; int background_qlen; int congestion_threshold; char *fuse_mountopts; @@ -548,7 +560,8 @@ struct _cmd_args { /* Run this process with valgrind? Might want to prevent calling * functions that prevent valgrind from working correctly, like * dlclose(). */ - int valgrind; + gf_valgrind_tool vgtool; + int localtime_logging; /* For the subdir mount */ @@ -562,21 +575,35 @@ struct _cmd_args { /* FUSE writeback cache support */ int kernel_writeback_cache; uint32_t attr_times_granularity; + + int fuse_flush_handle_interrupt; + int fuse_auto_inval; + + bool global_threading; + bool brick_mux; + + uint32_t fuse_dev_eperm_ratelimit_ns; }; typedef struct _cmd_args cmd_args_t; struct _glusterfs_graph { struct list_head list; - char graph_uuid[128]; struct timeval dob; void *first; void *top; /* selected by -n */ - uint32_t leaf_count; int xl_count; int id; /* Used in logging */ int used; /* Should be set when fuse gets first CHILD_UP */ uint32_t volfile_checksum; + uint32_t leaf_count; + void *last_xl; /* Stores the last xl of the graph, as of now only populated + in client multiplexed code path */ + pthread_mutex_t mutex; + pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */ + int parent_down; + char graph_uuid[128]; + char volume_id[GF_UUID_BUF_SIZE]; }; typedef struct _glusterfs_graph glusterfs_graph_t; @@ -685,6 +712,7 @@ struct _glusterfs_ctx { char btbuf[GF_BACKTRACE_LEN]; pthread_mutex_t notify_lock; + pthread_mutex_t cleanup_lock; pthread_cond_t notify_cond; int notifying; @@ -705,6 +733,15 @@ struct _glusterfs_ctx { } stats; struct list_head volfile_list; + /* Add members to manage janitor threads for cleanup fd */ + struct list_head janitor_fds; + pthread_cond_t fd_cond; + pthread_mutex_t fd_lock; + pthread_t janitor; + /* The variable is use to save total posix xlator count */ + uint32_t pxl_count; + + char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ }; typedef struct _glusterfs_ctx glusterfs_ctx_t; @@ -712,7 +749,8 @@ typedef struct { char volfile_checksum[SHA256_DIGEST_LENGTH]; char vol_id[NAME_MAX + 1]; struct list_head volfile_list; - + glusterfs_graph_t *graph; + FILE *pidfp; } gf_volfile_t; glusterfs_ctx_t * @@ -795,4 +833,6 @@ gf_free_mig_locks(lock_migration_info_t *locks); int glusterfs_read_secure_access_file(void); +int +glusterfs_graph_fini(glusterfs_graph_t *graph); #endif /* _GLUSTERFS_H */ diff --git a/libglusterfs/src/graph-utils.h b/libglusterfs/src/glusterfs/graph-utils.h index c0e87268c5c..247f1a55d5a 100644 --- a/libglusterfs/src/graph-utils.h +++ b/libglusterfs/src/glusterfs/graph-utils.h @@ -13,10 +13,6 @@ int glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph); - -char * -glusterfs_graph_print_buf(glusterfs_graph_t *graph); - int glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl); void diff --git a/libglusterfs/src/hashfn.h b/libglusterfs/src/glusterfs/hashfn.h index a4cb33f072a..6e92e706d8c 100644 --- a/libglusterfs/src/hashfn.h +++ b/libglusterfs/src/glusterfs/hashfn.h @@ -20,6 +20,4 @@ SuperFastHash(const char *data, int32_t len); uint32_t gf_dm_hashfn(const char *msg, int len); -uint32_t -ReallySimpleHash(char *path, int len); #endif /* __HASHFN_H__ */ diff --git a/libglusterfs/src/iatt.h b/libglusterfs/src/glusterfs/iatt.h index db366472970..f03d68b02f0 100644 --- a/libglusterfs/src/iatt.h +++ b/libglusterfs/src/glusterfs/iatt.h @@ -18,8 +18,8 @@ #include <sys/stat.h> /* for iatt <--> stat conversions */ #include <unistd.h> -#include "compat.h" -#include "compat-uuid.h" +#include "glusterfs/compat.h" +#include "glusterfs/compat-uuid.h" typedef enum { IA_INVAL = 0, @@ -92,6 +92,15 @@ struct old_iatt { uint32_t ia_ctime_nsec; }; +struct mdata_iatt { + int64_t ia_atime; /* last access time */ + int64_t ia_mtime; /* last modification time */ + int64_t ia_ctime; /* last status change time */ + uint32_t ia_atime_nsec; + uint32_t ia_mtime_nsec; + uint32_t ia_ctime_nsec; +}; + /* 64-bit mask for valid members in struct iatt. */ #define IATT_TYPE 0x0000000000000001U #define IATT_MODE 0x0000000000000002U @@ -264,12 +273,10 @@ st_mode_prot_from_ia(ia_prot_t prot) return prot_bit; } -static inline mode_t -st_mode_from_ia(ia_prot_t prot, ia_type_t type) +static inline uint32_t +st_mode_type_from_ia(ia_type_t type) { - mode_t st_mode = 0; uint32_t type_bit = 0; - uint32_t prot_bit = 0; switch (type) { case IA_IFREG: @@ -297,6 +304,17 @@ st_mode_from_ia(ia_prot_t prot, ia_type_t type) break; } + return type_bit; +} + +static inline mode_t +st_mode_from_ia(ia_prot_t prot, ia_type_t type) +{ + mode_t st_mode = 0; + uint32_t type_bit = 0; + uint32_t prot_bit = 0; + + type_bit = st_mode_type_from_ia(type); prot_bit = st_mode_prot_from_ia(prot); st_mode = (type_bit | prot_bit); @@ -304,6 +322,17 @@ st_mode_from_ia(ia_prot_t prot, ia_type_t type) return st_mode; } +static inline void +iatt_to_mdata(struct mdata_iatt *mdata, struct iatt *iatt) +{ + mdata->ia_atime = iatt->ia_atime; + mdata->ia_atime_nsec = iatt->ia_atime_nsec; + mdata->ia_mtime = iatt->ia_mtime; + mdata->ia_mtime_nsec = iatt->ia_mtime_nsec; + mdata->ia_ctime = iatt->ia_ctime; + mdata->ia_ctime_nsec = iatt->ia_ctime_nsec; +} + static inline int iatt_from_stat(struct iatt *iatt, struct stat *stat) { diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/glusterfs/inode.h index 276a1f1577c..4b28da510c7 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/glusterfs/inode.h @@ -28,10 +28,10 @@ typedef struct _inode inode_t; struct _dentry; typedef struct _dentry dentry_t; -#include "list.h" -#include "iatt.h" -#include "compat-uuid.h" -#include "fd.h" +#include "glusterfs/list.h" +#include "glusterfs/iatt.h" +#include "glusterfs/compat-uuid.h" +#include "glusterfs/fd.h" struct _inode_table { pthread_mutex_t lock; @@ -54,6 +54,17 @@ struct _inode_table { struct mem_pool *dentry_pool; /* memory pool for dentrys */ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */ int ctxcount; /* number of slots in inode->ctx */ + + /* This is required for 'invalidation' when 'nlookup' would be used, + specially in case of fuse-bridge */ + int32_t (*invalidator_fn)(xlator_t *, inode_t *); + xlator_t *invalidator_xl; + struct list_head invalidate; /* inodes which are in invalidation queue */ + uint32_t invalidate_size; /* count of inodes in invalidation list */ + + /* flag to indicate whether the cleanup of the inode + table started or not */ + gf_boolean_t cleanup_started; }; struct _dentry { @@ -89,7 +100,7 @@ struct _inode { inode_table_t *table; /* the table this inode belongs to */ uuid_t gfid; gf_lock_t lock; - uint64_t nlookup; + gf_atomic_t nlookup; uint32_t fd_count; /* Open fd count */ uint32_t active_fd_count; /* Active open fd count */ uint32_t ref; /* reference count on this inode */ @@ -100,6 +111,8 @@ struct _inode { struct list_head list; /* active/lru/purge */ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ + bool in_invalidate_list; /* Set if inode is in table invalidate list */ + bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ }; #define UUID0_STR "00000000-0000-0000-0000-000000000000" @@ -107,7 +120,12 @@ struct _inode { #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1) inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl); +inode_table_new(uint32_t lru_limit, xlator_t *xl); + +inode_table_t * +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl); void inode_table_destroy_all(glusterfs_ctx_t *ctx); @@ -139,6 +157,8 @@ inode_lookup(inode_t *inode); int inode_forget(inode_t *inode, uint64_t nlookup); +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup); int inode_ref_reduce_by_n(inode_t *inode, uint64_t nref); @@ -151,9 +171,6 @@ inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname, inode_t *newdir, const char *newname, inode_t *inode, struct iatt *stbuf); -dentry_t * -__dentry_grep(inode_table_t *table, inode_t *parent, const char *name); - inode_t * inode_grep(inode_table_t *table, inode_t *parent, const char *name); diff --git a/libglusterfs/src/iobuf.h b/libglusterfs/src/glusterfs/iobuf.h index 41ac5dd191c..4bd443efd5e 100644 --- a/libglusterfs/src/iobuf.h +++ b/libglusterfs/src/glusterfs/iobuf.h @@ -11,11 +11,12 @@ #ifndef _IOBUF_H_ #define _IOBUF_H_ -#include "list.h" -#include "common-utils.h" -#include <pthread.h> +#include <stddef.h> // for size_t #include <sys/mman.h> -#include <sys/uio.h> +#include "glusterfs/atomic.h" // for gf_atomic_t +#include <sys/uio.h> // for struct iovec +#include "glusterfs/locking.h" // for gf_lock_t +#include "glusterfs/list.h" #define GF_VARIABLE_IOBUF_COUNT 32 @@ -94,14 +95,14 @@ struct iobuf_arena { void *mem_base; struct iobuf *iobufs; /* allocated iobufs list */ - int active_cnt; - struct iobuf active; /* head node iobuf - (unused by itself) */ - int passive_cnt; + struct iobuf active; /* head node iobuf + (unused by itself) */ struct iobuf passive; /* head node iobuf (unused by itself) */ uint64_t alloc_cnt; /* total allocs in this pool */ - int max_active; /* max active buffers at a given time */ + int active_cnt; + int passive_cnt; + int max_active; /* max active buffers at a given time */ }; struct iobuf_pool { @@ -110,7 +111,6 @@ struct iobuf_pool { arena */ size_t default_page_size; /* default size of iobuf */ - int arena_cnt; struct list_head all_arenas; struct list_head arenas[GF_VARIABLE_IOBUF_COUNT]; /* array of arenas. Each element of the array is a list of arenas @@ -124,6 +124,7 @@ struct iobuf_pool { uint64_t request_misses; /* mostly the requests for higher value of iobufs */ + int arena_cnt; int rdma_device_count; struct list_head *mr_list[GF_RDMA_DEVICE_COUNT]; void *device[GF_RDMA_DEVICE_COUNT]; diff --git a/libglusterfs/src/latency.h b/libglusterfs/src/glusterfs/latency.h index 063ea291ee2..4d601bbcbd6 100644 --- a/libglusterfs/src/latency.h +++ b/libglusterfs/src/glusterfs/latency.h @@ -11,13 +11,23 @@ #ifndef __LATENCY_H__ #define __LATENCY_H__ -#include "glusterfs.h" +#include <inttypes.h> +#include <time.h> -typedef struct fop_latency { - double min; /* min time for the call (microseconds) */ - double max; /* max time for the call (microseconds) */ - double total; /* total time (microseconds) */ +typedef struct _gf_latency { + uint64_t min; /* min time for the call (nanoseconds) */ + uint64_t max; /* max time for the call (nanoseconds) */ + uint64_t total; /* total time (nanoseconds) */ uint64_t count; -} fop_latency_t; +} gf_latency_t; +gf_latency_t * +gf_latency_new(size_t n); + +void +gf_latency_reset(gf_latency_t *lat); + +void +gf_latency_update(gf_latency_t *lat, struct timespec *begin, + struct timespec *end); #endif /* __LATENCY_H__ */ diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h new file mode 100644 index 00000000000..cb31dd7614b --- /dev/null +++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h @@ -0,0 +1,245 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _LG_MESSAGES_H_ +#define _LG_MESSAGES_H_ + +#include "glusterfs/glfs-message-id.h" + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID( + LIBGLUSTERFS, LG_MSG_ASPRINTF_FAILED, LG_MSG_INVALID_ENTRY, + LG_MSG_COUNT_LESS_THAN_ZERO, LG_MSG_COUNT_LESS_THAN_DATA_PAIRS, + LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, LG_MSG_PAIRS_LESS_THAN_COUNT, + LG_MSG_KEY_OR_VALUE_NULL, LG_MSG_FAILED_TO_LOG_DICT, + LG_MSG_NULL_VALUE_IN_DICT, LG_MSG_DIR_OP_FAILED, + LG_MSG_STORE_HANDLE_CREATE_FAILED, LG_MSG_FILE_OP_FAILED, + LG_MSG_FILE_STAT_FAILED, LG_MSG_LOCK_FAILED, LG_MSG_UNLOCK_FAILED, + LG_MSG_DICT_SERIAL_FAILED, LG_MSG_DICT_UNSERIAL_FAILED, LG_MSG_NO_MEMORY, + LG_MSG_VOLUME_ERROR, LG_MSG_SUB_VOLUME_ERROR, LG_MSG_SYNTAX_ERROR, + LG_MSG_BACKTICK_PARSE_FAILED, LG_MSG_BUFFER_ERROR, LG_MSG_STRDUP_ERROR, + LG_MSG_HASH_FUNC_ERROR, LG_MSG_GET_BUCKET_FAILED, LG_MSG_INSERT_FAILED, + LG_MSG_OUT_OF_RANGE, LG_MSG_VALIDATE_RETURNS, LG_MSG_VALIDATE_REC_FAILED, + LG_MSG_RB_TABLE_CREATE_FAILED, LG_MSG_PATH_NOT_FOUND, + LG_MSG_EXPAND_FD_TABLE_FAILED, LG_MSG_MAPPING_FAILED, + LG_MSG_INIT_IOBUF_FAILED, LG_MSG_PAGE_SIZE_EXCEEDED, LG_MSG_ARENA_NOT_FOUND, + LG_MSG_IOBUF_NOT_FOUND, LG_MSG_POOL_NOT_FOUND, LG_MSG_SET_ATTRIBUTE_FAILED, + LG_MSG_READ_ATTRIBUTE_FAILED, LG_MSG_UNMOUNT_FAILED, + LG_MSG_LATENCY_MEASUREMENT_STATE, LG_MSG_NO_PERM, LG_MSG_NO_KEY, + LG_MSG_DICT_NULL, LG_MSG_INIT_TIMER_FAILED, LG_MSG_FD_ANONYMOUS_FAILED, + LG_MSG_FD_CREATE_FAILED, LG_MSG_BUFFER_FULL, LG_MSG_FWRITE_FAILED, + LG_MSG_PRINT_FAILED, LG_MSG_MEM_POOL_DESTROY, + LG_MSG_EXPAND_CLIENT_TABLE_FAILED, LG_MSG_DISCONNECT_CLIENT, + LG_MSG_PIPE_CREATE_FAILED, LG_MSG_SET_PIPE_FAILED, + LG_MSG_REGISTER_PIPE_FAILED, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS, + LG_MSG_INDEX_NOT_FOUND, LG_MSG_EPOLL_FD_CREATE_FAILED, + LG_MSG_SLOT_NOT_FOUND, LG_MSG_STALE_FD_FOUND, LG_MSG_GENERATION_MISMATCH, + LG_MSG_PTHREAD_KEY_CREATE_FAILED, LG_MSG_TRANSLATOR_INIT_FAILED, + LG_MSG_UUID_BUF_INIT_FAILED, LG_MSG_LKOWNER_BUF_INIT_FAILED, + LG_MSG_SYNCTASK_INIT_FAILED, LG_MSG_SYNCOPCTX_INIT_FAILED, + LG_MSG_GLOBAL_INIT_FAILED, LG_MSG_PTHREAD_FAILED, LG_MSG_DIR_IS_SYMLINK, + LG_MSG_RESOLVE_HOSTNAME_FAILED, LG_MSG_GETADDRINFO_FAILED, + LG_MSG_GETNAMEINFO_FAILED, LG_MSG_PATH_ERROR, LG_MSG_INET_PTON_FAILED, + LG_MSG_NEGATIVE_NUM_PASSED, LG_MSG_GETHOSTNAME_FAILED, + LG_MSG_RESERVED_PORTS_ERROR, LG_MSG_INVALID_PORT, LG_MSG_INVALID_FAMILY, + LG_MSG_CONVERSION_FAILED, LG_MSG_SKIP_HEADER_FAILED, LG_MSG_INVALID_LOG, + LG_MSG_UTIMES_FAILED, LG_MSG_BACKTRACE_SAVE_FAILED, LG_MSG_INIT_FAILED, + LG_MSG_VALIDATION_FAILED, LG_MSG_GRAPH_ERROR, LG_MSG_UNKNOWN_OPTIONS_FAILED, + LG_MSG_CTX_NULL, LG_MSG_TMPFILE_CREATE_FAILED, LG_MSG_DLOPEN_FAILED, + LG_MSG_LOAD_FAILED, LG_MSG_DLSYM_ERROR, LG_MSG_TREE_NOT_FOUND, + LG_MSG_PER_DENTRY, LG_MSG_DENTRY, LG_MSG_GETIFADDRS_FAILED, + LG_MSG_REGEX_OP_FAILED, LG_MSG_FRAME_ERROR, LG_MSG_SET_PARAM_FAILED, + LG_MSG_GET_PARAM_FAILED, LG_MSG_PREPARE_FAILED, LG_MSG_EXEC_FAILED, + LG_MSG_BINDING_FAILED, LG_MSG_DELETE_FAILED, LG_MSG_GET_ID_FAILED, + LG_MSG_CREATE_FAILED, LG_MSG_PARSE_FAILED, LG_MSG_GETCONTEXT_FAILED, + LG_MSG_UPDATE_FAILED, LG_MSG_QUERY_CALL_BACK_FAILED, + LG_MSG_GET_RECORD_FAILED, LG_MSG_DB_ERROR, LG_MSG_CONNECTION_ERROR, + LG_MSG_NOT_MULTITHREAD_MODE, LG_MSG_SKIP_PATH, LG_MSG_INVALID_FOP, + LG_MSG_QUERY_FAILED, LG_MSG_CLEAR_COUNTER_FAILED, LG_MSG_LOCK_LIST_FAILED, + LG_MSG_UNLOCK_LIST_FAILED, LG_MSG_ADD_TO_LIST_FAILED, LG_MSG_INIT_DB_FAILED, + LG_MSG_DELETE_FROM_LIST_FAILED, LG_MSG_CLOSE_CONNECTION_FAILED, + LG_MSG_INSERT_OR_UPDATE_FAILED, LG_MSG_FIND_OP_FAILED, + LG_MSG_CONNECTION_INIT_FAILED, LG_MSG_COMPLETED_TASK, LG_MSG_WAKE_UP_ZOMBIE, + LG_MSG_REWAITING_TASK, LG_MSG_SLEEP_ZOMBIE, LG_MSG_SWAPCONTEXT_FAILED, + LG_MSG_UNSUPPORTED_PLUGIN, LG_MSG_INVALID_DB_TYPE, LG_MSG_UNDERSIZED_BUF, + LG_MSG_DATA_CONVERSION_ERROR, LG_MSG_DICT_ERROR, LG_MSG_IOBUFS_NOT_FOUND, + LG_MSG_ENTRIES_NULL, LG_MSG_FD_NOT_FOUND_IN_FDTABLE, + LG_MSG_REALLOC_FOR_FD_PTR_FAILED, LG_MSG_DICT_SET_FAILED, LG_MSG_NULL_PTR, + LG_MSG_RBTHASH_INIT_BUCKET_FAILED, LG_MSG_ASSERTION_FAILED, + LG_MSG_HOSTNAME_NULL, LG_MSG_INVALID_IPV4_FORMAT, + LG_MSG_CTX_CLEANUP_STARTED, LG_MSG_TIMER_REGISTER_ERROR, + LG_MSG_PTR_HEADER_CORRUPTED, LG_MSG_INVALID_UPLINK, LG_MSG_CLIENT_NULL, + LG_MSG_XLATOR_DOES_NOT_IMPLEMENT, LG_MSG_DENTRY_NOT_FOUND, + LG_MSG_INODE_NOT_FOUND, LG_MSG_INODE_TABLE_NOT_FOUND, + LG_MSG_DENTRY_CREATE_FAILED, LG_MSG_INODE_CONTEXT_FREED, + LG_MSG_UNKNOWN_LOCK_TYPE, LG_MSG_UNLOCK_BEFORE_LOCK, + LG_MSG_LOCK_OWNER_ERROR, LG_MSG_MEMPOOL_PTR_NULL, + LG_MSG_QUOTA_XATTRS_MISSING, LG_MSG_INVALID_STRING, LG_MSG_BIND_REF, + LG_MSG_REF_COUNT, LG_MSG_INVALID_ARG, LG_MSG_VOL_OPTION_ADD, + LG_MSG_XLATOR_OPTION_INVALID, LG_MSG_GETTIMEOFDAY_FAILED, + LG_MSG_GRAPH_INIT_FAILED, LG_MSG_EVENT_NOTIFY_FAILED, + LG_MSG_ACTIVE_GRAPH_NULL, LG_MSG_VOLFILE_PARSE_ERROR, LG_MSG_FD_INODE_NULL, + LG_MSG_INVALID_VOLFILE_ENTRY, LG_MSG_PER_DENTRY_FAILED, + LG_MSG_PARENT_DENTRY_NOT_FOUND, LG_MSG_DENTRY_CYCLIC_LOOP, + LG_MSG_INVALID_POLL_IN, LG_MSG_INVALID_POLL_OUT, LG_MSG_EPOLL_FD_ADD_FAILED, + LG_MSG_EPOLL_FD_DEL_FAILED, LG_MSG_EPOLL_FD_MODIFY_FAILED, + LG_MSG_STARTED_EPOLL_THREAD, LG_MSG_EXITED_EPOLL_THREAD, + LG_MSG_START_EPOLL_THREAD_FAILED, LG_MSG_FALLBACK_TO_POLL, + LG_MSG_QUOTA_CONF_ERROR, LG_MSG_RBTHASH_GET_ENTRY_FAILED, + LG_MSG_RBTHASH_GET_BUCKET_FAILED, LG_MSG_RBTHASH_INSERT_FAILED, + LG_MSG_RBTHASH_INIT_ENTRY_FAILED, LG_MSG_TMPFILE_DELETE_FAILED, + LG_MSG_MEMPOOL_INVALID_FREE, LG_MSG_LOCK_FAILURE, LG_MSG_SET_LOG_LEVEL, + LG_MSG_VERIFY_PLATFORM, LG_MSG_RUNNER_LOG, LG_MSG_LEASEID_BUF_INIT_FAILED, + LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST, + LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED, + LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG, + LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE, + LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED, + LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED, + LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, LG_MSG_DUPLICATE_ENTRY, + LG_MSG_THREAD_NAME_TOO_LONG, LG_MSG_SET_THREAD_FAILED, + LG_MSG_THREAD_CREATE_FAILED, LG_MSG_FILE_DELETE_FAILED, LG_MSG_WRONG_VALUE, + LG_MSG_PATH_OPEN_FAILED, LG_MSG_DISPATCH_HANDLER_FAILED, + LG_MSG_READ_FILE_FAILED, LG_MSG_ENTRIES_NOT_PROVIDED, + LG_MSG_ENTRIES_PROVIDED, LG_MSG_UNKNOWN_OPTION_TYPE, + LG_MSG_OPTION_DEPRECATED, LG_MSG_INVALID_INIT, LG_MSG_OBJECT_NULL, + LG_MSG_GRAPH_NOT_SET, LG_MSG_FILENAME_NOT_SPECIFIED, LG_MSG_STRUCT_MISS, + LG_MSG_METHOD_MISS, LG_MSG_INPUT_DATA_NULL, LG_MSG_OPEN_LOGFILE_FAILED); + +#define LG_MSG_EPOLL_FD_CREATE_FAILED_STR "epoll fd creation failed" +#define LG_MSG_INVALID_POLL_IN_STR "invalid poll_in value" +#define LG_MSG_INVALID_POLL_OUT_STR "invalid poll_out value" +#define LG_MSG_SLOT_NOT_FOUND_STR "could not find slot" +#define LG_MSG_EPOLL_FD_ADD_FAILED_STR "failed to add fd to epoll" +#define LG_MSG_EPOLL_FD_DEL_FAILED_STR "fail to delete fd to epoll" +#define LG_MSG_EPOLL_FD_MODIFY_FAILED_STR "failed to modify fd events" +#define LG_MSG_STALE_FD_FOUND_STR "stale fd found" +#define LG_MSG_GENERATION_MISMATCH_STR "generation mismatch" +#define LG_MSG_STARTED_EPOLL_THREAD_STR "Started thread with index" +#define LG_MSG_EXITED_EPOLL_THREAD_STR "Exited thread" +#define LG_MSG_DISPATCH_HANDLER_FAILED_STR "Failed to dispatch handler" +#define LG_MSG_START_EPOLL_THREAD_FAILED_STR "Failed to start thread" +#define LG_MSG_PIPE_CREATE_FAILED_STR "pipe creation failed" +#define LG_MSG_SET_PIPE_FAILED_STR "could not set pipe to non blocking mode" +#define LG_MSG_REGISTER_PIPE_FAILED_STR \ + "could not register pipe fd with poll event loop" +#define LG_MSG_POLL_IGNORE_MULTIPLE_THREADS_STR \ + "Currently poll does not use multiple event processing threads, count " \ + "ignored" +#define LG_MSG_INDEX_NOT_FOUND_STR "index not found" +#define LG_MSG_READ_FILE_FAILED_STR "read on file returned error" +#define LG_MSG_RB_TABLE_CREATE_FAILED_STR "Failed to create rb table bucket" +#define LG_MSG_HASH_FUNC_ERROR_STR "Hash function not given" +#define LG_MSG_ENTRIES_NOT_PROVIDED_STR \ + "Both mem-pool and expected entries not provided" +#define LG_MSG_ENTRIES_PROVIDED_STR \ + "Both mem-pool and expected entries are provided" +#define LG_MSG_RBTHASH_INIT_BUCKET_FAILED_STR "failed to init buckets" +#define LG_MSG_RBTHASH_GET_ENTRY_FAILED_STR "Failed to get entry from mem-pool" +#define LG_MSG_RBTHASH_GET_BUCKET_FAILED_STR "Failed to get bucket" +#define LG_MSG_RBTHASH_INSERT_FAILED_STR "Failed to insert entry" +#define LG_MSG_RBTHASH_INIT_ENTRY_FAILED_STR "Failed to init entry" +#define LG_MSG_FILE_STAT_FAILED_STR "failed to stat" +#define LG_MSG_INET_PTON_FAILED_STR "inet_pton() failed" +#define LG_MSG_INVALID_ENTRY_STR "Invalid arguments" +#define LG_MSG_NEGATIVE_NUM_PASSED_STR "negative number passed" +#define LG_MSG_PATH_ERROR_STR "Path manipulation failed" +#define LG_MSG_FILE_OP_FAILED_STR "could not open/read file, getting ports info" +#define LG_MSG_RESERVED_PORTS_ERROR_STR \ + "Not able to get reserved ports, hence there is a possibility that " \ + "glusterfs may consume reserved port" +#define LG_MSG_INVALID_PORT_STR "invalid port" +#define LG_MSG_GETNAMEINFO_FAILED_STR "Could not lookup hostname" +#define LG_MSG_GETIFADDRS_FAILED_STR "getifaddrs() failed" +#define LG_MSG_INVALID_FAMILY_STR "Invalid family" +#define LG_MSG_CONVERSION_FAILED_STR "String conversion failed" +#define LG_MSG_GETADDRINFO_FAILED_STR "error in getaddrinfo" +#define LG_MSG_DUPLICATE_ENTRY_STR "duplicate entry for volfile-server" +#define LG_MSG_PTHREAD_NAMING_FAILED_STR "Failed to compose thread name" +#define LG_MSG_THREAD_NAME_TOO_LONG_STR \ + "Thread name is too long. It has been truncated" +#define LG_MSG_SET_THREAD_FAILED_STR "Could not set thread name" +#define LG_MSG_THREAD_CREATE_FAILED_STR "Thread creation failed" +#define LG_MSG_PTHREAD_ATTR_INIT_FAILED_STR \ + "Thread attribute initialization failed" +#define LG_MSG_SKIP_HEADER_FAILED_STR "Failed to skip header section" +#define LG_MSG_INVALID_LOG_STR "Invalid log-format" +#define LG_MSG_UTIMENSAT_FAILED_STR "utimenstat failed" +#define LG_MSG_UTIMES_FAILED_STR "utimes failed" +#define LG_MSG_FILE_DELETE_FAILED_STR "Unable to delete file" +#define LG_MSG_BACKTRACE_SAVE_FAILED_STR "Failed to save the backtrace" +#define LG_MSG_WRONG_VALUE_STR "wrong value" +#define LG_MSG_DIR_OP_FAILED_STR "Failed to create directory" +#define LG_MSG_DIR_IS_SYMLINK_STR "dir is symlink" +#define LG_MSG_RESOLVE_HOSTNAME_FAILED_STR "couldnot resolve hostname" +#define LG_MSG_PATH_OPEN_FAILED_STR "Unable to open path" +#define LG_MSG_NO_MEMORY_STR "Error allocating memory" +#define LG_MSG_EVENT_NOTIFY_FAILED_STR "notification failed" +#define LG_MSG_PER_DENTRY_FAILED_STR "per dentry fn returned" +#define LG_MSG_PARENT_DENTRY_NOT_FOUND_STR "parent not found" +#define LG_MSG_DENTRY_CYCLIC_LOOP_STR \ + "detected cyclic loop formation during inode linkage" +#define LG_MSG_CTX_NULL_STR "_ctx not found" +#define LG_MSG_DENTRY_NOT_FOUND_STR "dentry not found" +#define LG_MSG_OUT_OF_RANGE_STR "out of range" +#define LG_MSG_UNKNOWN_OPTION_TYPE_STR "unknown option type" +#define LG_MSG_VALIDATE_RETURNS_STR "validate of returned" +#define LG_MSG_OPTION_DEPRECATED_STR \ + "option is deprecated, continuing with correction" +#define LG_MSG_VALIDATE_REC_FAILED_STR "validate_rec failed" +#define LG_MSG_MAPPING_FAILED_STR "mapping failed" +#define LG_MSG_INIT_IOBUF_FAILED_STR "init failed" +#define LG_MSG_ARENA_NOT_FOUND_STR "arena not found" +#define LG_MSG_PAGE_SIZE_EXCEEDED_STR \ + "page_size of iobufs in arena being added is greater than max available" +#define LG_MSG_POOL_NOT_FOUND_STR "pool not found" +#define LG_MSG_IOBUF_NOT_FOUND_STR "iobuf not found" +#define LG_MSG_DLOPEN_FAILED_STR "DL open failed" +#define LG_MSG_DLSYM_ERROR_STR "dlsym missing" +#define LG_MSG_LOAD_FAILED_STR "Failed to load xlator options table" +#define LG_MSG_INPUT_DATA_NULL_STR \ + "input data is null. cannot update the lru limit of the inode table. " \ + "continuing with older value." +#define LG_MSG_INIT_FAILED_STR "No init() found" +#define LG_MSG_VOLUME_ERROR_STR \ + "Initialization of volume failed. review your volfile again." +#define LG_MSG_TREE_NOT_FOUND_STR "Translator tree not found" +#define LG_MSG_SET_LOG_LEVEL_STR "setting log level" +#define LG_MSG_INVALID_INIT_STR \ + "Invalid log-level. possible values are DEBUG|WARNING|ERROR|NONE|TRACE" +#define LG_MSG_OBJECT_NULL_STR "object is null, returning false." +#define LG_MSG_GRAPH_NOT_SET_STR "Graph is not set for xlator" +#define LG_MSG_OPEN_LOGFILE_FAILED_STR "failed to open logfile" +#define LG_MSG_STRDUP_ERROR_STR "failed to create metrics dir" +#define LG_MSG_FILENAME_NOT_SPECIFIED_STR "no filename specified" +#define LG_MSG_UNDERSIZED_BUF_STR "data value is smaller than expected" +#define LG_MSG_DICT_SET_FAILED_STR "unable to set dict" +#define LG_MSG_COUNT_LESS_THAN_ZERO_STR "count < 0!" +#define LG_MSG_PAIRS_LESS_THAN_COUNT_STR "less than count data pairs found" +#define LG_MSG_NULL_PTR_STR "pair->key is null!" +#define LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO_STR "value->len < 0" +#define LG_MSG_INVALID_ARG_STR "buf is null" +#define LG_MSG_KEY_OR_VALUE_NULL_STR "key or value is null" +#define LG_MSG_NULL_VALUE_IN_DICT_STR "null value found in dict" +#define LG_MSG_FAILED_TO_LOG_DICT_STR "Failed to log dictionary" +#define LG_MSG_DICT_ERROR_STR "dict error" +#define LG_MSG_STRUCT_MISS_STR "struct missing" +#define LG_MSG_METHOD_MISS_STR "method missing(init)" + +#endif /* !_LG_MESSAGES_H_ */ diff --git a/libglusterfs/src/list.h b/libglusterfs/src/glusterfs/list.h index 221a710ca30..221a710ca30 100644 --- a/libglusterfs/src/list.h +++ b/libglusterfs/src/glusterfs/list.h diff --git a/libglusterfs/src/lkowner.h b/libglusterfs/src/glusterfs/lkowner.h index b49e9af6bcb..692de34bc7a 100644 --- a/libglusterfs/src/lkowner.h +++ b/libglusterfs/src/glusterfs/lkowner.h @@ -11,7 +11,7 @@ #ifndef _LK_OWNER_H #define _LK_OWNER_H -#include "glusterfs-fops.h" +#include "glusterfs/glusterfs-fops.h" /* LKOWNER to string functions */ static inline void diff --git a/libglusterfs/src/locking.h b/libglusterfs/src/glusterfs/locking.h index 43cc87735d1..43cc87735d1 100644 --- a/libglusterfs/src/locking.h +++ b/libglusterfs/src/glusterfs/locking.h diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/glusterfs/logging.h index 859050d568b..b3a6ac191f0 100644 --- a/libglusterfs/src/logging.h +++ b/libglusterfs/src/glusterfs/logging.h @@ -16,7 +16,7 @@ #include <stdio.h> #include <stdarg.h> #include <pthread.h> -#include "list.h" +#include "glusterfs/list.h" #ifdef GF_DARWIN_HOST_OS #define GF_PRI_FSBLK "u" @@ -98,11 +98,9 @@ typedef enum { typedef struct gf_log_handle_ { pthread_mutex_t logfile_mutex; - uint8_t logrotate; - uint8_t cmd_history_logrotate; gf_loglevel_t loglevel; - int gf_log_syslog; gf_loglevel_t sys_log_level; + int gf_log_syslog; char *filename; FILE *logfile; FILE *gf_log_logfile; @@ -113,12 +111,14 @@ typedef struct gf_log_handle_ { char *ident; int log_control_file_found; struct list_head lru_queue; - uint32_t lru_size; - uint32_t lru_cur_size; - uint32_t timeout; pthread_mutex_t log_buf_lock; struct _gf_timer *log_flush_timer; int localtime; + uint32_t lru_size; + uint32_t lru_cur_size; + uint32_t timeout; + uint8_t logrotate; + uint8_t cmd_history_logrotate; } gf_log_handle_t; typedef struct log_buf_ { @@ -300,7 +300,7 @@ _gf_log_eh(const char *function, const char *fmt, ...) /* Log once in GF_UNIVERSAL_ANSWER times */ #define GF_LOG_OCCASIONALLY(var, args...) \ - if (!(var++ % GF_UNIVERSAL_ANSWER)) { \ + if (var++ == 0 || !((var - 1) % GF_UNIVERSAL_ANSWER)) { \ gf_log(args); \ } @@ -369,10 +369,6 @@ gf_log_disable_suppression_before_exit(struct _glusterfs_ctx *ctx); gf_log((xl)->name, GF_LOG_ERROR, format, ##args) int -_gf_slog(const char *domain, const char *file, const char *function, int line, - gf_loglevel_t level, const char *event, ...); - -int _gf_smsg(const char *domain, const char *file, const char *function, int32_t line, gf_loglevel_t level, int errnum, int trace, uint64_t msgid, const char *event, ...); @@ -381,12 +377,7 @@ _gf_smsg(const char *domain, const char *file, const char *function, #define gf_smsg(dom, level, errnum, msgid, event...) \ do { \ _gf_smsg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 0, \ - msgid, ##event); \ - } while (0) - -#define gf_slog(dom, level, event...) \ - do { \ - _gf_slog(dom, __FILE__, __FUNCTION__, __LINE__, level, ##event); \ + msgid, msgid##_STR, ##event); \ } while (0) #endif /* __LOGGING_H__ */ diff --git a/libglusterfs/src/lvm-defaults.h b/libglusterfs/src/glusterfs/lvm-defaults.h index 32feebf3f6e..32feebf3f6e 100644 --- a/libglusterfs/src/lvm-defaults.h +++ b/libglusterfs/src/glusterfs/lvm-defaults.h diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h index 188b142b099..e5b3276d047 100644 --- a/libglusterfs/src/mem-pool.h +++ b/libglusterfs/src/glusterfs/mem-pool.h @@ -11,12 +11,11 @@ #ifndef _MEM_POOL_H_ #define _MEM_POOL_H_ -#include "list.h" -#include "locking.h" -#include "atomic.h" -#include "logging.h" -#include "mem-types.h" -#include "glusterfs.h" /* for glusterfs_ctx_t */ +#include "glusterfs/list.h" +#include "glusterfs/atomic.h" +#include "glusterfs/logging.h" +#include "glusterfs/mem-types.h" +#include "glusterfs/glusterfs.h" /* for glusterfs_ctx_t */ #include <stdlib.h> #include <inttypes.h> #include <string.h> @@ -38,12 +37,16 @@ #define GF_MEM_TRAILER_MAGIC 0xBAADF00D #define GF_MEM_INVALID_MAGIC 0xDEADC0DE +#define POOL_SMALLEST 7 /* i.e. 128 */ +#define POOL_LARGEST 20 /* i.e. 1048576 */ +#define NPOOLS (POOL_LARGEST - POOL_SMALLEST + 1) + struct mem_acct_rec { const char *typestr; - size_t size; - size_t max_size; + uint64_t size; + uint64_t max_size; + uint64_t total_allocs; uint32_t num_allocs; - uint32_t total_allocs; uint32_t max_num_allocs; gf_lock_t lock; #ifdef DEBUG @@ -189,7 +192,7 @@ gf_memdup(const void *src, size_t size) { void *dup_mem = NULL; - dup_mem = GF_MALLOC(size, gf_common_mt_strdup); + dup_mem = GF_MALLOC(size, gf_common_mt_memdup); if (!dup_mem) goto out; @@ -199,6 +202,24 @@ out: return dup_mem; } +#ifdef GF_DISABLE_MEMPOOL + +/* No-op memory pool enough to fit current API without massive redesign. */ + +struct mem_pool { + unsigned long sizeof_type; +}; + +#define mem_pools_init() \ + do { \ + } while (0) +#define mem_pools_fini() \ + do { \ + } while (0) +#define mem_pool_thread_destructor(pool_list) (void)pool_list + +#else /* !GF_DISABLE_MEMPOOL */ + /* kind of 'header' for the actual mem_pool_shared structure, this might make * it possible to dump some more details in a statedump */ struct mem_pool { @@ -206,8 +227,12 @@ struct mem_pool { unsigned long sizeof_type; unsigned long count; /* requested pool size (unused) */ char *name; - gf_atomic_t active; /* current allocations */ - + char *xl_name; + gf_atomic_t active; /* current allocations */ +#ifdef DEBUG + gf_atomic_t hit; /* number of allocations served from pt_pool */ + gf_atomic_t miss; /* number of std allocs due to miss */ +#endif struct list_head owner; /* glusterfs_ctx_t->mempool_list */ glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */ @@ -224,7 +249,10 @@ typedef struct pooled_obj_hdr { struct mem_pool *pool; } pooled_obj_hdr_t; -#define AVAILABLE_SIZE(p2) ((1 << (p2)) - sizeof(pooled_obj_hdr_t)) +/* Each memory block inside a pool has a fixed size that is a power of two. + * However each object will have a header that will reduce the available + * space. */ +#define AVAILABLE_SIZE(p2) ((1UL << (p2)) - sizeof(pooled_obj_hdr_t)) typedef struct per_thread_pool { /* the pool that was used to request this allocation */ @@ -235,24 +263,26 @@ typedef struct per_thread_pool { } per_thread_pool_t; typedef struct per_thread_pool_list { - /* - * These first two members are protected by the global pool lock. When - * a thread first tries to use any pool, we create one of these. We - * link it into the global list using thr_list so the pool-sweeper - * thread can find it, and use pthread_setspecific so this thread can - * find it. When the per-thread destructor runs, we "poison" the pool - * list to prevent further allocations. This also signals to the - * pool-sweeper thread that the list should be detached and freed after - * the next time it's swept. - */ + /* thr_list is used to place the TLS pool_list into the active global list + * (pool_threads) or the inactive global list (pool_free_threads). It's + * protected by the global pool_lock. */ struct list_head thr_list; - unsigned int poison; + + /* This lock is used to update poison and the hot/cold lists of members + * of 'pools' array. */ + pthread_spinlock_t lock; + + /* This field is used to mark a pool_list as not being owned by any thread. + * This means that the sweeper thread won't be cleaning objects stored in + * its pools. mem_put() uses it to decide if the object being released is + * placed into its original pool_list or directly destroyed. */ + bool poison; + /* * There's really more than one pool, but the actual number is hidden * in the implementation code so we just make it a single-element array * here. */ - pthread_spinlock_t lock; per_thread_pool_t pools[1]; } per_thread_pool_list_t; @@ -272,11 +302,13 @@ struct mem_pool_shared { }; void -mem_pools_init_early(void); /* basic initialization of memory pools */ -void -mem_pools_init_late(void); /* start the pool_sweeper thread */ +mem_pools_init(void); /* start the pool_sweeper thread */ void mem_pools_fini(void); /* cleanup memory pools */ +void +mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); + +#endif /* GF_DISABLE_MEMPOOL */ struct mem_pool * mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h new file mode 100644 index 00000000000..d45d5b68c91 --- /dev/null +++ b/libglusterfs/src/glusterfs/mem-types.h @@ -0,0 +1,139 @@ +/* + Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __MEM_TYPES_H__ +#define __MEM_TYPES_H__ + +enum gf_common_mem_types_ { + gf_common_mt_dnscache6, /* used only in one location */ + gf_common_mt_event_pool, + gf_common_mt_reg, + gf_common_mt_pollfd, /* used only in one location */ + gf_common_mt_fdentry_t, /* used only in one location */ + gf_common_mt_fdtable_t, /* used only in one location */ + gf_common_mt_fd_ctx, /* used only in one location */ + gf_common_mt_gf_dirent_t, + gf_common_mt_inode_t, /* used only in one location */ + gf_common_mt_inode_ctx, /* used only in one location */ + gf_common_mt_list_head, + gf_common_mt_inode_table_t, /* used only in one location */ + gf_common_mt_xlator_t, + gf_common_mt_xlator_list_t, /* used only in one location */ + gf_common_mt_volume_opt_list_t, + gf_common_mt_gf_timer_t, /* used only in one location */ + gf_common_mt_gf_timer_registry_t, /* used only in one location */ + gf_common_mt_auth_handle_t, /* used only in one location */ + gf_common_mt_iobuf, /* used only in one location */ + gf_common_mt_iobuf_arena, /* used only in one location */ + gf_common_mt_iobref, /* used only in one location */ + gf_common_mt_iobuf_pool, /* used only in one location */ + gf_common_mt_iovec, + gf_common_mt_memdup, /* used only in one location */ + gf_common_mt_asprintf, /* used only in one location */ + gf_common_mt_strdup, + gf_common_mt_socket_private_t, /* used only in one location */ + gf_common_mt_ioq, /* used only in one location */ + gf_common_mt_char, + gf_common_mt_rbthash_table_t, /* used only in one location */ + gf_common_mt_rbthash_bucket, /* used only in one location */ + gf_common_mt_mem_pool, /* used only in one location */ + gf_common_mt_rpcsvc_auth_list, /* used only in one location */ + gf_common_mt_rpcsvc_t, /* used only in one location */ + gf_common_mt_rpcsvc_program_t, /* used only in one location */ + gf_common_mt_rpcsvc_listener_t, /* used only in one location */ + gf_common_mt_rpcsvc_wrapper_t, /* used only in one location */ + gf_common_mt_rpcclnt_t, /* used only in one location */ + gf_common_mt_rpcclnt_savedframe_t, /* used only in one location */ + gf_common_mt_rpc_trans_t, + gf_common_mt_rpc_trans_pollin_t, /* used only in one location */ + gf_common_mt_rpc_trans_reqinfo_t, /* used only in one location */ + gf_common_mt_glusterfs_graph_t, + gf_common_mt_rdma_private_t, /* used only in one location */ + gf_common_mt_rpc_transport_t, /* used only in one location */ + gf_common_mt_rdma_post_t, /* used only in one location */ + gf_common_mt_qpent, /* used only in one location */ + gf_common_mt_rdma_device_t, /* used only in one location */ + gf_common_mt_rdma_arena_mr, /* used only in one location */ + gf_common_mt_sge, /* used only in one location */ + gf_common_mt_rpcclnt_cb_program_t, /* used only in one location */ + gf_common_mt_libxl_marker_local, /* used only in one location */ + gf_common_mt_graph_buf, /* used only in one location */ + gf_common_mt_trie_trie, /* used only in one location */ + gf_common_mt_trie_data, /* used only in one location */ + gf_common_mt_trie_node, /* used only in one location */ + gf_common_mt_trie_buf, /* used only in one location */ + gf_common_mt_run_argv, /* used only in one location */ + gf_common_mt_run_logbuf, /* used only in one location */ + gf_common_mt_fd_lk_ctx_t, /* used only in one location */ + gf_common_mt_fd_lk_ctx_node_t, /* used only in one location */ + gf_common_mt_buffer_t, /* used only in one location */ + gf_common_mt_circular_buffer_t, /* used only in one location */ + gf_common_mt_eh_t, + gf_common_mt_store_handle_t, /* used only in one location */ + gf_common_mt_store_iter_t, /* used only in one location */ + gf_common_mt_drc_client_t, /* used only in one location */ + gf_common_mt_drc_globals_t, /* used only in one location */ + gf_common_mt_groups_t, + gf_common_mt_cliententry_t, /* used only in one location */ + gf_common_mt_clienttable_t, /* used only in one location */ + gf_common_mt_client_t, /* used only in one location */ + gf_common_mt_client_ctx, /* used only in one location */ + gf_common_mt_auxgids, /* used only in one location */ + gf_common_mt_syncopctx, /* used only in one location */ + gf_common_mt_iobrefs, /* used only in one location */ + gf_common_mt_gsync_status_t, + gf_common_mt_uuid_t, + gf_common_mt_mgmt_v3_lock_obj_t, /* used only in one location */ + gf_common_mt_txn_opinfo_obj_t, /* used only in one location */ + gf_common_mt_strfd_t, /* used only in one location */ + gf_common_mt_strfd_data_t, /* used only in one location */ + gf_common_mt_regex_t, /* used only in one location */ + gf_common_mt_ereg, /* used only in one location */ + gf_common_mt_wr, /* used only in one location */ + gf_common_mt_dnscache, /* used only in one location */ + gf_common_mt_dnscache_entry, /* used only in one location */ + gf_common_mt_parser_t, /* used only in one location */ + gf_common_quota_meta_t, + gf_common_mt_rbuf_t, /* used only in one location */ + gf_common_mt_rlist_t, /* used only in one location */ + gf_common_mt_rvec_t, /* used only in one location */ + /* glusterd can load the nfs-xlator dynamically and needs these two */ + gf_common_mt_nfs_netgroups, /* used only in one location */ + gf_common_mt_nfs_exports, /* used only in one location */ + gf_common_mt_gf_brick_spec_t, /* used only in one location */ + gf_common_mt_int, + gf_common_mt_pointer, + gf_common_mt_synctask, /* used only in one location */ + gf_common_mt_syncstack, /* used only in one location */ + gf_common_mt_syncenv, /* used only in one location */ + gf_common_mt_scan_data, /* used only in one location */ + gf_common_list_node, + gf_mt_default_args_t, /* used only in one location */ + gf_mt_default_args_cbk_t, /* used only in one location */ + /*used for compound fops*/ + gf_mt_compound_req_t, /* used only in one location */ + gf_mt_compound_rsp_t, /* used only in one location */ + gf_common_mt_tw_ctx, /* used only in one location */ + gf_common_mt_tw_timer_list, + /*lock migration*/ + gf_common_mt_lock_mig, + /* throttle */ + gf_common_mt_tbf_t, /* used only in one location */ + gf_common_mt_tbf_bucket_t, /* used only in one location */ + gf_common_mt_tbf_throttle_t, /* used only in one location */ + gf_common_mt_pthread_t, /* used only in one location */ + gf_common_ping_local_t, /* used only in one location */ + gf_common_volfile_t, + gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */ + gf_common_mt_server_cmdline_t, /* used only in one location */ + gf_common_mt_latency_t, + gf_common_mt_end +}; +#endif diff --git a/libglusterfs/src/monitoring.h b/libglusterfs/src/glusterfs/monitoring.h index 7826d4ec4df..09d9f54e734 100644 --- a/libglusterfs/src/monitoring.h +++ b/libglusterfs/src/glusterfs/monitoring.h @@ -11,7 +11,7 @@ #ifndef __MONITORING_H__ #define __MONITORING_H__ -#include "glusterfs.h" +#include "glusterfs/glusterfs.h" #define GLUSTER_METRICS_DIR "/var/run/gluster/metrics" diff --git a/libglusterfs/src/options.h b/libglusterfs/src/glusterfs/options.h index b0f76d5639c..747b13ba375 100644 --- a/libglusterfs/src/options.h +++ b/libglusterfs/src/glusterfs/options.h @@ -15,8 +15,8 @@ #include <stdint.h> #include <inttypes.h> -#include "xlator.h" -#include "libglusterfs-messages.h" +#include "glusterfs/xlator.h" +#include "glusterfs/libglusterfs-messages.h" /* Add possible new type of option you may need */ typedef enum { GF_OPTION_TYPE_ANY = 0, @@ -190,7 +190,7 @@ DECLARE_INIT_OPT(uint64_t, uint64); DECLARE_INIT_OPT(int64_t, int64); DECLARE_INIT_OPT(uint32_t, uint32); DECLARE_INIT_OPT(int32_t, int32); -DECLARE_INIT_OPT(size_t, size); +DECLARE_INIT_OPT(uint64_t, size); DECLARE_INIT_OPT(uint64_t, size_uint64); DECLARE_INIT_OPT(double, percent); DECLARE_INIT_OPT(double, percent_or_size); @@ -264,14 +264,14 @@ DECLARE_INIT_OPT(uint32_t, time); #define DECLARE_RECONF_OPT(type_t, type) \ int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \ - char *key, type_t *val_p); + char *key, int keylen, type_t *val_p); DECLARE_RECONF_OPT(char *, str); DECLARE_RECONF_OPT(uint64_t, uint64); DECLARE_RECONF_OPT(int64_t, int64); DECLARE_RECONF_OPT(uint32_t, uint32); DECLARE_RECONF_OPT(int32_t, int32); -DECLARE_RECONF_OPT(size_t, size); +DECLARE_RECONF_OPT(uint64_t, size); DECLARE_RECONF_OPT(uint64_t, size_uint64); DECLARE_RECONF_OPT(double, percent); DECLARE_RECONF_OPT(double, percent_or_size); @@ -283,56 +283,44 @@ DECLARE_RECONF_OPT(uint32_t, time); #define DEFINE_RECONF_OPT(type_t, type, conv) \ int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \ - char *key, type_t *val_p) \ + char *key, int keylen, type_t *val_p) \ { \ int ret = 0; \ - volume_option_t *opt = NULL; \ - char *def_value = NULL; \ - char *set_value = NULL; \ char *value = NULL; \ xlator_t *old_THIS = NULL; \ \ - opt = xlator_volume_option_get(this, key); \ + volume_option_t *opt = xlator_volume_option_get(this, key); \ if (!opt) { \ gf_msg(this->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY, \ "unknown option: %s", key); \ - ret = -1; \ - return ret; \ + return -1; \ } \ - def_value = opt->default_value; \ - ret = dict_get_str(options, key, &set_value); \ - \ - if (def_value) \ - value = def_value; \ - if (set_value) \ - value = set_value; \ - if (!value) { \ - gf_msg_trace(this->name, 0, "option %s not set", key); \ - *val_p = (type_t)0; \ - return 0; \ - } \ - if (value == def_value) { \ + ret = dict_get_strn(options, key, keylen, &value); \ + if (ret == 0 && value) { \ + gf_msg(this->name, GF_LOG_INFO, 0, 0, \ + "option %s using set value %s", key, value); \ + } else if (opt->default_value) { \ + value = opt->default_value; \ gf_msg_trace(this->name, 0, "option %s using default value %s", \ key, value); \ } else { \ - gf_msg(this->name, GF_LOG_INFO, 0, 0, \ - "option %s using set value %s", key, value); \ + gf_msg_trace(this->name, 0, "option %s not set", key); \ + *val_p = (type_t)0; \ + return 0; \ } \ + \ old_THIS = THIS; \ THIS = this; \ ret = conv(value, val_p); \ THIS = old_THIS; \ if (ret) \ return ret; \ - ret = xlator_option_validate(this, key, value, opt, NULL); \ - return ret; \ + return xlator_option_validate(this, key, value, opt, NULL); \ } #define GF_OPTION_RECONF(key, val, opt, type, err_label) \ do { \ - int val_ret = 0; \ - val_ret = xlator_option_reconf_##type(THIS, opt, key, &(val)); \ - if (val_ret) \ + if (xlator_option_reconf_##type(THIS, opt, key, SLEN(key), &(val))) \ goto err_label; \ } while (0) diff --git a/libglusterfs/src/parse-utils.h b/libglusterfs/src/glusterfs/parse-utils.h index 8653b9dd180..8653b9dd180 100644 --- a/libglusterfs/src/parse-utils.h +++ b/libglusterfs/src/glusterfs/parse-utils.h diff --git a/libglusterfs/src/quota-common-utils.h b/libglusterfs/src/glusterfs/quota-common-utils.h index 7c3ce498427..0096e340756 100644 --- a/libglusterfs/src/quota-common-utils.h +++ b/libglusterfs/src/glusterfs/quota-common-utils.h @@ -11,7 +11,7 @@ #ifndef _QUOTA_COMMON_UTILS_H #define _QUOTA_COMMON_UTILS_H -#include "iatt.h" +#include "glusterfs/iatt.h" #define GF_QUOTA_CONF_VERSION 1.2 #define QUOTA_CONF_HEADER "GlusterFS Quota conf | version: v1.2\n" @@ -39,13 +39,15 @@ gf_boolean_t quota_meta_is_null(const quota_meta_t *meta); int32_t -quota_data_to_meta(data_t *data, char *key, quota_meta_t *meta); +quota_data_to_meta(data_t *data, quota_meta_t *meta); int32_t -quota_dict_get_inode_meta(dict_t *dict, char *key, quota_meta_t *meta); +quota_dict_get_inode_meta(dict_t *dict, char *key, const int keylen, + quota_meta_t *meta); int32_t -quota_dict_get_meta(dict_t *dict, char *key, quota_meta_t *meta); +quota_dict_get_meta(dict_t *dict, char *key, const int keylen, + quota_meta_t *meta); int32_t quota_dict_set_meta(dict_t *dict, char *key, const quota_meta_t *meta, diff --git a/libglusterfs/src/rbthash.h b/libglusterfs/src/glusterfs/rbthash.h index dea4648ca5a..4c731de69c2 100644 --- a/libglusterfs/src/rbthash.h +++ b/libglusterfs/src/glusterfs/rbthash.h @@ -10,14 +10,12 @@ #ifndef __RBTHASH_TABLE_H_ #define __RBTHASH_TABLE_H_ -#include "rb.h" -#include "locking.h" -#include "mem-pool.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" -#include <pthread.h> +#include <stdint.h> // for uint32_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t +#include "glusterfs/list.h" // for list_head +#include "glusterfs/locking.h" // for gf_lock_t +struct mem_pool; #define GF_RBTHASH_MEMPOOL 16384 // 1048576 #define GF_RBTHASH "rbthash" diff --git a/libglusterfs/src/refcount.h b/libglusterfs/src/glusterfs/refcount.h index 6a3a73d80e8..cf922dabb05 100644 --- a/libglusterfs/src/refcount.h +++ b/libglusterfs/src/glusterfs/refcount.h @@ -22,7 +22,7 @@ #undef REFCOUNT_NEEDS_LOCK #else #define REFCOUNT_NEEDS_LOCK -#include "locking.h" +#include "glusterfs/locking.h" #endif /* compiler support for __sync_*_and_fetch() */ typedef void (*gf_ref_release_t)(void *data); diff --git a/libglusterfs/src/revision.h b/libglusterfs/src/glusterfs/revision.h index 3c404d30e78..3c404d30e78 100644 --- a/libglusterfs/src/revision.h +++ b/libglusterfs/src/glusterfs/revision.h diff --git a/libglusterfs/src/rot-buffs.h b/libglusterfs/src/glusterfs/rot-buffs.h index 1e91e83ecf0..9dc227d58b8 100644 --- a/libglusterfs/src/rot-buffs.h +++ b/libglusterfs/src/glusterfs/rot-buffs.h @@ -11,9 +11,9 @@ #ifndef __ROT_BUFFS_H #define __ROT_BUFFS_H -#include "list.h" -#include "locking.h" -#include "common-utils.h" +#include "glusterfs/list.h" +#include "glusterfs/locking.h" +#include "glusterfs/common-utils.h" typedef struct rbuf_iovec { struct iovec iov; diff --git a/libglusterfs/src/run.h b/libglusterfs/src/glusterfs/run.h index 76af95fd27f..76af95fd27f 100644 --- a/libglusterfs/src/run.h +++ b/libglusterfs/src/glusterfs/run.h diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/glusterfs/stack.h index 8d58e4daab9..536a330d38b 100644 --- a/libglusterfs/src/stack.h +++ b/libglusterfs/src/glusterfs/stack.h @@ -25,14 +25,14 @@ typedef struct call_pool call_pool_t; #include <sys/time.h> -#include "xlator.h" -#include "dict.h" -#include "list.h" -#include "common-utils.h" -#include "lkowner.h" -#include "client_t.h" -#include "libglusterfs-messages.h" -#include "timespec.h" +#include "glusterfs/xlator.h" +#include "glusterfs/dict.h" +#include "glusterfs/list.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/lkowner.h" +#include "glusterfs/client_t.h" +#include "glusterfs/libglusterfs-messages.h" +#include "glusterfs/timespec.h" #define NFS_PID 1 #define LOW_PRIO_PROC_PID -1 @@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame, xlator_t *this, int32_t op_ret, int32_t op_errno, ...); +void +gf_frame_latency_update(call_frame_t *frame); + struct call_pool { union { struct list_head all_frames; @@ -149,8 +152,6 @@ struct _call_stack { } while (0); struct xlator_fops; -void -gf_update_latency(call_frame_t *frame); static inline void FRAME_DESTROY(call_frame_t *frame) @@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame) void *local = NULL; if (frame->root->ctx->measure_latency) - gf_update_latency(frame); + gf_frame_latency_update(frame); list_del_init(&frame->frames); if (frame->local) { @@ -232,8 +233,10 @@ STACK_RESET(call_stack_t *stack) local_type *__local = (frm)->local; \ __local->uid = frm->root->uid; \ __local->gid = frm->root->gid; \ + __local->pid = frm->root->pid; \ frm->root->uid = 0; \ frm->root->gid = 0; \ + frm->root->pid = GF_CLIENT_PID_NO_ROOT_SQUASH; \ } while (0); #define FRAME_SU_UNDO(frm, local_type) \ @@ -241,6 +244,7 @@ STACK_RESET(call_stack_t *stack) local_type *__local = (frm)->local; \ frm->root->uid = __local->uid; \ frm->root->gid = __local->gid; \ + frm->root->pid = __local->pid; \ } while (0); /* NOTE: make sure to keep this as an macro, mainly because, we need 'fn' @@ -426,6 +430,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) if (ngrps <= SMALL_GROUP_COUNT) { stack->groups = stack->groups_small; } else { + GF_FREE(stack->groups_large); stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t), gf_common_mt_groups_t); if (!stack->groups_large) @@ -439,6 +444,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) } static inline int +call_stack_groups_capacity(call_stack_t *stack) +{ + return max(stack->ngrps, SMALL_GROUP_COUNT); +} + +static inline int call_frames_count(call_stack_t *call_stack) { call_frame_t *pos; diff --git a/libglusterfs/src/statedump.h b/libglusterfs/src/glusterfs/statedump.h index af653041493..ce082706bdf 100644 --- a/libglusterfs/src/statedump.h +++ b/libglusterfs/src/glusterfs/statedump.h @@ -12,8 +12,8 @@ #define STATEDUMP_H #include <stdarg.h> -#include "inode.h" -#include "strfd.h" +#include "glusterfs/inode.h" +#include "glusterfs/strfd.h" #define GF_DUMP_MAX_BUF_LEN 4096 @@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd); void gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd); +void +gf_latency_statedump_and_reset(char *key, gf_latency_t *lat); #endif /* STATEDUMP_H */ diff --git a/libglusterfs/src/store.h b/libglusterfs/src/glusterfs/store.h index d69e39a7ce3..a1f70c7b840 100644 --- a/libglusterfs/src/store.h +++ b/libglusterfs/src/glusterfs/store.h @@ -10,8 +10,8 @@ #ifndef _GLUSTERD_STORE_H_ #define _GLUSTERD_STORE_H_ -#include "compat.h" -#include "glusterfs.h" +#include "glusterfs/compat.h" +#include "glusterfs/glusterfs.h" struct gf_store_handle_ { char *path; @@ -59,8 +59,8 @@ int32_t gf_store_unlink_tmppath(gf_store_handle_t *shandle); int -gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key, - char **iter_val, gf_store_op_errno_t *store_errno); +gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val, + gf_store_op_errno_t *store_errno); int32_t gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value); @@ -69,6 +69,9 @@ int32_t gf_store_save_value(int fd, char *key, char *value); int32_t +gf_store_save_items(int fd, char *items); + +int32_t gf_store_handle_new(const char *path, gf_store_handle_t **handle); int @@ -92,7 +95,7 @@ int32_t gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value); int32_t -gf_store_iter_destroy(gf_store_iter_t *iter); +gf_store_iter_destroy(gf_store_iter_t **iter); char * gf_store_strerror(gf_store_op_errno_t op_errno); diff --git a/libglusterfs/src/strfd.h b/libglusterfs/src/glusterfs/strfd.h index 861cd02e005..861cd02e005 100644 --- a/libglusterfs/src/strfd.h +++ b/libglusterfs/src/glusterfs/strfd.h diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/glusterfs/syncop-utils.h index 1f3ee403edc..1f3ee403edc 100644 --- a/libglusterfs/src/syncop-utils.h +++ b/libglusterfs/src/glusterfs/syncop-utils.h diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/glusterfs/syncop.h index 88668125d5a..4e9241a32fc 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/glusterfs/syncop.h @@ -11,10 +11,12 @@ #ifndef _SYNCOP_H #define _SYNCOP_H -#include "xlator.h" #include <sys/time.h> #include <pthread.h> #include <ucontext.h> +#include "glusterfs/dict.h" // for dict_t +#include "glusterfs/stack.h" // for call_frame_t, STACK_DESTROY, STACK_... +#include "glusterfs/timer.h" #define SYNCENV_PROC_MAX 16 #define SYNCENV_PROC_MIN 2 @@ -29,9 +31,15 @@ #define SYNCOPCTX_PID 0x00000008 #define SYNCOPCTX_LKOWNER 0x00000010 +#ifdef HAVE_TSAN_API +/* Currently hardcoded within thread context maintained by the sanitizer. */ +#define TSAN_THREAD_NAMELEN 64 +#endif + struct synctask; struct syncproc; struct syncenv; +struct synccond; typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque); @@ -55,9 +63,12 @@ struct synctask { call_frame_t *opframe; synctask_cbk_t synccbk; synctask_fn_t syncfn; - synctask_state_t state; + struct timespec *delta; + gf_timer_t *timer; + struct synccond *synccond; void *opaque; void *stack; + synctask_state_t state; int woken; int slept; int ret; @@ -65,6 +76,13 @@ struct synctask { uid_t uid; gid_t gid; +#ifdef HAVE_TSAN_API + struct { + void *fiber; + char name[TSAN_THREAD_NAMELEN]; + } tsan; +#endif + ucontext_t ctx; struct syncproc *proc; @@ -73,11 +91,18 @@ struct synctask { int done; struct list_head waitq; /* can wait only "once" at a time */ - char btbuf[GF_BACKTRACE_LEN]; }; struct syncproc { pthread_t processor; + +#ifdef HAVE_TSAN_API + struct { + void *fiber; + char name[TSAN_THREAD_NAMELEN]; + } tsan; +#endif + ucontext_t sched; struct syncenv *env; struct synctask *current; @@ -86,19 +111,21 @@ struct syncproc { /* hosts the scheduler thread and framework for executing synctasks */ struct syncenv { struct syncproc proc[SYNCENV_PROC_MAX]; - int procs; + + pthread_mutex_t mutex; + pthread_cond_t cond; struct list_head runq; - int runcount; struct list_head waitq; - int waitcount; + + int procs; + int procs_idle; + + int runcount; int procmin; int procmax; - pthread_mutex_t mutex; - pthread_cond_t cond; - size_t stacksize; int destroy; /* FLAG to mark syncenv is in destroy mode @@ -124,6 +151,13 @@ struct synclock { }; typedef struct synclock synclock_t; +struct synccond { + pthread_mutex_t pmutex; + pthread_cond_t pcond; + struct list_head waitq; +}; +typedef struct synccond synccond_t; + struct syncbarrier { gf_boolean_t initialized; /*Set on successful initialization*/ pthread_mutex_t guard; /* guard the remaining members, pair @cond */ @@ -138,8 +172,19 @@ typedef struct syncbarrier syncbarrier_t; struct syncargs { int op_ret; int op_errno; + + /* + * The below 3 iatt structures are used in the fops + * whose callbacks get struct iatt as one of the + * a return arguments. Currently, the maximum number + * of iatt structures returned is 3 for some fops + * such as mknod, copy_file_range, mkdir etc. So + * all the following 3 iatt structures would be used + * for those fops. + */ struct iatt iatt1; struct iatt iatt2; + struct iatt iatt3; dict_t *xattr; struct statvfs statvfs_buf; struct iovec *vector; @@ -209,7 +254,7 @@ struct syncopctx { #define __yield(args) \ do { \ if (args->task) { \ - synctask_yield(args->task); \ + synctask_yield(args->task, NULL); \ } else { \ pthread_mutex_lock(&args->mutex); \ { \ @@ -230,10 +275,16 @@ struct syncopctx { task = synctask_get(); \ stb->task = task; \ if (task) \ - frame = task->opframe; \ + frame = copy_frame(task->opframe); \ else \ frame = syncop_create_frame(THIS); \ \ + if (!frame) { \ + stb->op_ret = -1; \ + stb->op_errno = errno; \ + break; \ + } \ + \ if (task) { \ frame->root->uid = task->uid; \ frame->root->gid = task->gid; \ @@ -245,10 +296,7 @@ struct syncopctx { STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params); \ \ __yield(stb); \ - if (task) \ - STACK_RESET(frame->root); \ - else \ - STACK_DESTROY(frame->root); \ + STACK_DESTROY(frame->root); \ } while (0) /* @@ -297,7 +345,9 @@ synctask_join(struct synctask *task); void synctask_wake(struct synctask *task); void -synctask_yield(struct synctask *task); +synctask_yield(struct synctask *task, struct timespec *delta); +void +synctask_sleep(int32_t secs); void synctask_waitfor(struct synctask *task, int count); @@ -331,6 +381,7 @@ syncop_create_frame(xlator_t *this) if (!frame) return NULL; + frame->root->type = GF_OP_TYPE_FOP; opctx = syncopctx_getctx(); if (opctx && (opctx->valid & SYNCOPCTX_PID)) @@ -394,6 +445,24 @@ synclock_trylock(synclock_t *lock); int synclock_unlock(synclock_t *lock); +int32_t +synccond_init(synccond_t *cond); + +void +synccond_destroy(synccond_t *cond); + +int +synccond_wait(synccond_t *cond, synclock_t *lock); + +int +synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta); + +void +synccond_signal(synccond_t *cond); + +void +synccond_broadcast(synccond_t *cond); + int syncbarrier_init(syncbarrier_t *barrier); int @@ -488,17 +557,18 @@ syncop_write(xlator_t *subvol, fd_t *fd, const char *buf, int size, int syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector, int32_t count, off_t offset, struct iobref *iobref, - uint32_t flags, dict_t *xdata_in, dict_t **xdata_out); + uint32_t flags, struct iatt *preiatt, struct iatt *postiatt, + dict_t *xdata_in, dict_t **xdata_out); int syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags, /* out */ struct iovec **vector, int *count, struct iobref **iobref, - dict_t *xdata_in, dict_t **xdata_out); + struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out); int -syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, dict_t *xdata_in, - dict_t **xdata_out); +syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, struct iatt *preiatt, + struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out); int syncop_truncate(xlator_t *subvol, loc_t *loc, off_t offset, dict_t *xdata_in, @@ -513,8 +583,8 @@ syncop_rmdir(xlator_t *subvol, loc_t *loc, int flags, dict_t *xdata_in, dict_t **xdata_out); int -syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, dict_t *xdata_in, - dict_t **xdata_out); +syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, struct iatt *preiatt, + struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out); int syncop_flush(xlator_t *subvol, fd_t *fd, dict_t *xdata_in, dict_t **xdata_out); @@ -628,11 +698,21 @@ int syncop_icreate(xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *xdata_out); int -syncop_namelink(xlator_t *subvol, loc_t *loc, dict_t *xdata_out); - -int syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata_in, dict_t **xdata_out); +int +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, struct iatt *stbuf, + struct iatt *preiatt_dst, struct iatt *postiatt_dst, + dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata); + #endif /* _SYNCOP_H */ diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/glusterfs/syscall.h index f691ebdaa53..b6d3ab4f2ad 100644 --- a/libglusterfs/src/syscall.h +++ b/libglusterfs/src/glusterfs/syscall.h @@ -16,6 +16,8 @@ #include <sys/statvfs.h> #include <sys/stat.h> #include <sys/time.h> +#include <sys/socket.h> +#include <stdio.h> /* GF follows the Linux XATTR definition, which differs in Darwin. */ #define GF_XATTR_CREATE 0x1 /* set value, fail if attr already exists */ @@ -94,18 +96,27 @@ int sys_unlink(const char *pathname); int +sys_unlinkat(int dfd, const char *pathname); + +int sys_rmdir(const char *pathname); int sys_symlink(const char *oldpath, const char *newpath); int +sys_symlinkat(const char *oldpath, int dirfd, const char *newpath); + +int sys_rename(const char *oldpath, const char *newpath); int sys_link(const char *oldpath, const char *newpath); int +sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath); + +int sys_chmod(const char *path, mode_t mode); int @@ -221,4 +232,47 @@ sys_pread(int fd, void *buf, size_t count, off_t offset); ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t offset); +int +sys_socket(int domain, int type, int protocol); + +int +sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags); + +#ifdef GF_BSD_HOST_OS +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ +#endif /* GF_BSD_HOST_OS */ + +/* + * According to the man page of copy_file_range, both off_in and off_out are + * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not + * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t + * instead. Since it's a pointer type it should be okay. It just needs + * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms. + * off64_t is recognized by freebsd. + * TODO: In future, when freebsd can recognize loff_t, probably revisit this + * and change the off_in and off_out to (loff_t *). + */ +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, + size_t len, unsigned int flags); + +int +sys_kill(pid_t pid, int sig); + +#ifdef __FreeBSD__ +int +sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp, + const void *newp, size_t newlen); +#endif + #endif /* __SYSCALL_H__ */ diff --git a/libglusterfs/src/template-component-messages.h b/libglusterfs/src/glusterfs/template-component-messages.h index 4dcdda4abf4..aa7ad3d1baa 100644 --- a/libglusterfs/src/template-component-messages.h +++ b/libglusterfs/src/glusterfs/template-component-messages.h @@ -11,7 +11,7 @@ #ifndef _component_MESSAGES_H_ #define _component_MESSAGES_H_ -#include "glfs-message-id.h" +#include "glusterfs/glfs-message-id.h" /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/libglusterfs/src/throttle-tbf.h b/libglusterfs/src/glusterfs/throttle-tbf.h index b44a0f1fdd5..cccb13c83d9 100644 --- a/libglusterfs/src/throttle-tbf.h +++ b/libglusterfs/src/glusterfs/throttle-tbf.h @@ -8,9 +8,9 @@ cases as published by the Free Software Foundation. */ -#include "list.h" -#include "xlator.h" -#include "locking.h" +#include "glusterfs/list.h" +#include "glusterfs/xlator.h" +#include "glusterfs/locking.h" #ifndef THROTTLE_TBF_H__ #define THROTTLE_TBF_H__ diff --git a/libglusterfs/src/timer.h b/libglusterfs/src/glusterfs/timer.h index 216fc65144a..ae5b2edf451 100644 --- a/libglusterfs/src/timer.h +++ b/libglusterfs/src/glusterfs/timer.h @@ -11,8 +11,8 @@ #ifndef _TIMER_H #define _TIMER_H -#include "glusterfs.h" -#include "xlator.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/xlator.h" #include <sys/time.h> #include <pthread.h> @@ -34,10 +34,11 @@ struct _gf_timer { }; struct _gf_timer_registry { + struct list_head active; + pthread_mutex_t lock; + pthread_cond_t cond; pthread_t th; char fin; - struct list_head active; - gf_lock_t lock; }; typedef struct _gf_timer gf_timer_t; diff --git a/libglusterfs/src/timespec.h b/libglusterfs/src/glusterfs/timespec.h index 73db2d16abe..bb9ab446a5f 100644 --- a/libglusterfs/src/timespec.h +++ b/libglusterfs/src/glusterfs/timespec.h @@ -21,9 +21,13 @@ void timespec_now(struct timespec *ts); void +timespec_now_realtime(struct timespec *ts); +void timespec_adjust_delta(struct timespec *ts, struct timespec delta); void timespec_sub(const struct timespec *begin, const struct timespec *end, struct timespec *res); +int +timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts); #endif /* __INCLUDE_TIMESPEC_H__ */ diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/glusterfs/trie.h index 6d2d8015964..6d2d8015964 100644 --- a/libglusterfs/src/trie.h +++ b/libglusterfs/src/glusterfs/trie.h diff --git a/libglusterfs/src/upcall-utils.h b/libglusterfs/src/glusterfs/upcall-utils.h index 765b8ef1aa6..0de8428c5fc 100644 --- a/libglusterfs/src/upcall-utils.h +++ b/libglusterfs/src/glusterfs/upcall-utils.h @@ -11,9 +11,9 @@ #ifndef _UPCALL_UTILS_H #define _UPCALL_UTILS_H -#include "iatt.h" -#include "compat-uuid.h" -#include "compat.h" +#include "glusterfs/iatt.h" +#include "glusterfs/compat-uuid.h" +#include "glusterfs/compat.h" /* Flags sent for cache_invalidation */ #define UP_NLINK 0x00000001 /* update nlink */ diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/glusterfs/xlator.h index 12078bb72e3..4fd3abdaeff 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -11,18 +11,16 @@ #ifndef _XLATOR_H #define _XLATOR_H -#include <stdio.h> -#include <stdint.h> -#include <inttypes.h> - -#include "event-history.h" -#include "logging.h" -#include "common-utils.h" -#include "dict.h" -#include "compat.h" -#include "list.h" -#include "latency.h" -#include "compat-uuid.h" +#include <stdint.h> // for int32_t +#include <sys/types.h> // for off_t, mode_t, off64_t, dev_t +#include "glusterfs/glusterfs-fops.h" // for GF_FOP_MAXVALUE, entrylk_cmd +#include "glusterfs/atomic.h" // for gf_atomic_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t +#include "glusterfs/compat-uuid.h" // for uuid_t +#include "glusterfs/compat.h" +#include "glusterfs/event-history.h" +#include "glusterfs/dict.h" +#include "glusterfs/latency.h" #define FIRST_CHILD(xl) (xl->children->xlator) #define SECOND_CHILD(xl) (xl->children->next->xlator) @@ -34,6 +32,8 @@ #define GF_SET_ATTR_ATIME 0x10 #define GF_SET_ATTR_MTIME 0x20 #define GF_SET_ATTR_CTIME 0x40 +#define GF_ATTR_ATIME_NOW 0x80 +#define GF_ATTR_MTIME_NOW 0x100 #define gf_attr_mode_set(mode) ((mode)&GF_SET_ATTR_MODE) #define gf_attr_uid_set(mode) ((mode)&GF_SET_ATTR_UID) @@ -54,14 +54,14 @@ typedef struct _loc loc_t; typedef int32_t (*event_notify_fn_t)(xlator_t *this, int32_t event, void *data, ...); -#include "list.h" -#include "gf-dirent.h" -#include "stack.h" -#include "iobuf.h" -#include "globals.h" -#include "iatt.h" -#include "options.h" -#include "client_t.h" +#include "glusterfs/list.h" +#include "glusterfs/gf-dirent.h" +#include "glusterfs/stack.h" +#include "glusterfs/iobuf.h" +#include "glusterfs/globals.h" +#include "glusterfs/iatt.h" +#include "glusterfs/options.h" +#include "glusterfs/client_t.h" struct _loc { const char *path; @@ -354,6 +354,11 @@ typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_cbk_t)( + call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); @@ -544,6 +549,11 @@ typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this, typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this, + fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, + dict_t *xdata); /* WARNING: make sure the list is in order with FOP definition in `rpc/xdr/src/glusterfs-fops.x`. @@ -609,6 +619,7 @@ struct xlator_fops { fop_put_t put; fop_icreate_t icreate; fop_namelink_t namelink; + fop_copy_file_range_t copy_file_range; /* these entries are used for a typechecking hack in STACK_WIND _only_ */ /* make sure to add _cbk variables only after defining regular fops as @@ -673,6 +684,7 @@ struct xlator_fops { fop_put_cbk_t put_cbk; fop_icreate_cbk_t icreate_cbk; fop_namelink_cbk_t namelink_cbk; + fop_copy_file_range_cbk_t copy_file_range_cbk; }; typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode); @@ -690,6 +702,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode); typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd); +typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd); + struct xlator_cbks { cbk_forget_t forget; cbk_release_t release; @@ -700,6 +714,8 @@ struct xlator_cbks { cbk_ictxmerge_t ictxmerge; cbk_inodectx_size_t ictxsize; cbk_fdctx_size_t fdctxsize; + cbk_fdclose_t fdclose; + cbk_fdclose_t fdclosedir; }; typedef int32_t (*dumpop_priv_t)(xlator_t *this); @@ -789,7 +805,7 @@ struct _xlator { struct { /* for latency measurement */ - fop_latency_t latencies[GF_FOP_MAXVALUE]; + gf_latency_t latencies[GF_FOP_MAXVALUE]; /* for latency measurement */ fop_metrics_t metrics[GF_FOP_MAXVALUE]; @@ -842,14 +858,16 @@ struct _xlator { /* Flag to understand how this xlator is categorized */ gf_category_t category; -}; -typedef struct { - int32_t (*init)(xlator_t *this); - void (*fini)(xlator_t *this); - int32_t (*reconfigure)(xlator_t *this, dict_t *options); - event_notify_fn_t notify; -} class_methods_t; + /* Variable to save xprt associated for detach brick */ + gf_atomic_t xprtrefcnt; + + /* Flag to notify got CHILD_DOWN event for detach brick */ + uint32_t notify_down; + + /* Flag to avoid throw duplicate PARENT_DOWN event */ + uint32_t parent_down; +}; /* This would be the only structure which needs to be exported by the translators. For the backward compatibility, in 4.x series @@ -1023,8 +1041,10 @@ gf_boolean_t loc_is_nameless(loc_t *loc); int xlator_mem_acct_init(xlator_t *xl, int num_types); +void +xlator_mem_acct_unref(struct mem_acct *mem_acct); int -is_gf_log_command(xlator_t *trans, const char *name, char *value); +is_gf_log_command(xlator_t *trans, const char *name, char *value, size_t size); int glusterd_check_log_level(const char *value); int @@ -1069,4 +1089,18 @@ xlator_memrec_free(xlator_t *xl); void xlator_mem_cleanup(xlator_t *this); + +void +handle_default_options(xlator_t *xl, dict_t *options); + +void +gluster_graph_take_reference(xlator_t *tree); + +gf_boolean_t +mgmt_is_multiplexed_daemon(char *name); + +gf_boolean_t +xlator_is_cleanup_starting(xlator_t *this); +int +graph_total_client_xlator(glusterfs_graph_t *graph); #endif /* _XLATOR_H */ diff --git a/libglusterfs/src/graph-print.c b/libglusterfs/src/graph-print.c index 3b984b64508..595d74330a1 100644 --- a/libglusterfs/src/graph-print.c +++ b/libglusterfs/src/graph-print.c @@ -10,10 +10,10 @@ #include <sys/uio.h> -#include "common-utils.h" -#include "xlator.h" -#include "graph-utils.h" -#include "libglusterfs-messages.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/xlator.h" +#include "glusterfs/graph-utils.h" +#include "glusterfs/libglusterfs-messages.h" struct gf_printer { ssize_t (*write)(struct gf_printer *gp, char *buf, size_t len); @@ -36,25 +36,6 @@ gp_write_file(struct gf_printer *gp, char *buf, size_t len) return len; } -static ssize_t -gp_write_buf(struct gf_printer *gp, char *buf, size_t len) -{ - struct iovec *iov = gp->priv; - - if (iov->iov_len < len) { - gf_msg("graph-print", GF_LOG_ERROR, 0, LG_MSG_BUFFER_FULL, - "buffer full"); - - return -1; - } - - memcpy(iov->iov_base, buf, len); - iov->iov_base += len; - iov->iov_len -= len; - - return len; -} - static int gpprintf(struct gf_printer *gp, const char *format, ...) { @@ -152,43 +133,3 @@ glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph) return glusterfs_graph_print(&gp, graph); } - -char * -glusterfs_graph_print_buf(glusterfs_graph_t *graph) -{ - FILE *f = NULL; - struct iovec iov = { - 0, - }; - int len = 0; - char *buf = NULL; - struct gf_printer gp = {.write = gp_write_buf, .priv = &iov}; - - f = fopen("/dev/null", "a"); - if (!f) { - gf_msg("graph-print", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, - "cannot open /dev/null"); - - return NULL; - } - len = glusterfs_graph_print_file(f, graph); - fclose(f); - if (len == -1) - return NULL; - - buf = GF_CALLOC(1, len + 1, gf_common_mt_graph_buf); - if (!buf) { - return NULL; - } - iov.iov_base = buf; - iov.iov_len = len; - - len = glusterfs_graph_print(&gp, graph); - if (len == -1) { - GF_FREE(buf); - - return NULL; - } - - return buf; -} diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index a105c0cd72d..13f298eb3bd 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -8,16 +8,32 @@ cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include <dlfcn.h> -#include <netdb.h> -#include <fnmatch.h> -#include <stdlib.h> -#include "defaults.h" -#include <unistd.h> -#include "syscall.h" -#include <regex.h> -#include "libglusterfs-messages.h" +#include <stdint.h> // for uint32_t +#include <sys/time.h> // for timeval +#include <errno.h> // for EIO, errno, EINVAL, ENOMEM +#include <fnmatch.h> // for fnmatch, FNM_NOESCAPE +#include <openssl/sha.h> // for SHA256_DIGEST_LENGTH +#include <regex.h> // for regmatch_t, regcomp +#include <stdio.h> // for fclose, fopen, snprintf +#include <stdlib.h> // for NULL, atoi, mkstemp +#include <string.h> // for strcmp, strerror, memcpy +#include <strings.h> // for rindex +#include <sys/stat.h> // for stat +#include <sys/time.h> // for gettimeofday +#include <unistd.h> // for gethostname, getpid +#include "glusterfs/common-utils.h" // for gf_strncpy, gf_time_fmt +#include "glusterfs/defaults.h" +#include "glusterfs/dict.h" // for dict_foreach, dict_set_... +#include "glusterfs/globals.h" // for xlator_t, xlator_list_t +#include "glusterfs/glusterfs.h" // for glusterfs_graph_t, glus... +#include "glusterfs/glusterfs-fops.h" // for GF_EVENT_GRAPH_NEW, GF_... +#include "glusterfs/libglusterfs-messages.h" // for LG_MSG_GRAPH_ERROR, LG_... +#include "glusterfs/list.h" // for list_add, list_del_init +#include "glusterfs/logging.h" // for gf_msg, GF_LOG_ERROR +#include "glusterfs/mem-pool.h" // for GF_FREE, gf_strdup, GF_... +#include "glusterfs/mem-types.h" // for gf_common_mt_xlator_list_t +#include "glusterfs/options.h" // for xlator_tree_reconfigure +#include "glusterfs/syscall.h" // for sys_close, sys_stat #if 0 static void @@ -25,7 +41,7 @@ _gf_dump_details (int argc, char **argv) { extern FILE *gf_log_logfile; int i = 0; - char timestr[64]; + char timestr[GF_TIMESTR_SIZE]; time_t utime = 0; pid_t mypid = 0; struct utsname uname_buf = {{0, }, }; @@ -114,6 +130,53 @@ out: return cert_depth; } +xlator_t * +glusterfs_get_last_xlator(glusterfs_graph_t *graph) +{ + xlator_t *trav = graph->first; + if (!trav) + return NULL; + + while (trav->next) + trav = trav->next; + + return trav; +} + +xlator_t * +glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl) +{ + xlator_list_t *unlink = NULL; + xlator_list_t *prev = NULL; + xlator_list_t **tmp = NULL; + xlator_t *next_child = NULL; + xlator_t *xl = NULL; + + for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) { + if ((*tmp)->xlator == cxl) { + unlink = *tmp; + *tmp = (*tmp)->next; + if (*tmp) + next_child = (*tmp)->xlator; + break; + } + prev = *tmp; + } + + if (!prev) + xl = pxl; + else if (prev->xlator) + xl = prev->xlator->graph->last_xl; + + if (xl) + xl->next = next_child; + if (next_child) + next_child->prev = xl; + + GF_FREE(unlink); + return next_child; +} + int glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl) { @@ -181,7 +244,7 @@ glusterfs_graph_insert(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx, ixl->ctx = ctx; ixl->graph = graph; - ixl->options = get_new_dict(); + ixl->options = dict_new(); if (!ixl->options) goto err; @@ -406,39 +469,29 @@ _xlator_check_unknown_options(xlator_t *xl, void *data) dict_foreach(xl->options, _log_if_unknown_option, xl); } -int +static int glusterfs_graph_unknown_options(glusterfs_graph_t *graph) { xlator_foreach(graph->first, _xlator_check_unknown_options, NULL); return 0; } -void -fill_uuid(char *uuid, int size) +static void +fill_uuid(char *uuid, int size, struct timeval tv) { - char hostname[256] = { - 0, - }; - struct timeval tv = { + char hostname[50] = { 0, }; - char now_str[64]; - - if (gettimeofday(&tv, NULL) == -1) { - gf_msg("graph", GF_LOG_ERROR, errno, LG_MSG_GETTIMEOFDAY_FAILED, - "gettimeofday: " - "failed"); - } + char now_str[GF_TIMESTR_SIZE]; - if (gethostname(hostname, 256) == -1) { + if (gethostname(hostname, sizeof(hostname) - 1) != 0) { gf_msg("graph", GF_LOG_ERROR, errno, LG_MSG_GETHOSTNAME_FAILED, - "gethostname: " - "failed"); + "gethostname failed"); + hostname[sizeof(hostname) - 1] = '\0'; } - gf_time_fmt(now_str, sizeof now_str, tv.tv_sec, gf_timefmt_dirent); - snprintf(uuid, size, "%s-%d-%s:%" GF_PRI_SUSECONDS, hostname, getpid(), - now_str, tv.tv_usec); + gf_time_fmt_tv(now_str, sizeof now_str, &tv, gf_timefmt_dirent); + snprintf(uuid, size, "%s-%d-%s", hostname, getpid(), now_str); return; } @@ -514,14 +567,13 @@ glusterfs_graph_prepare(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx, } else { ret = glusterfs_graph_settop(graph, volume_name, _gf_false); } - if (!ret) { - goto ok; - } - gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR, - "glusterfs graph settop failed"); - return -1; -ok: + if (ret) { + gf_msg("graph", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ERROR, + "glusterfs graph settop failed"); + errno = EINVAL; + return -1; + } /* XXX: WORM VOLUME */ ret = glusterfs_graph_worm(graph, ctx); @@ -569,7 +621,7 @@ ok: /* XXX: DOB setting */ gettimeofday(&graph->dob, NULL); - fill_uuid(graph->graph_uuid, 128); + fill_uuid(graph->graph_uuid, sizeof(graph->graph_uuid), graph->dob); graph->id = ctx->graph_id++; @@ -1092,6 +1144,8 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph) ret = xlator_tree_free_memacct(graph->first); list_del_init(&graph->list); + pthread_mutex_destroy(&graph->mutex); + pthread_cond_destroy(&graph->child_down_cond); GF_FREE(graph); return ret; @@ -1134,6 +1188,33 @@ out: } int +glusterfs_graph_fini(glusterfs_graph_t *graph) +{ + xlator_t *trav = NULL; + + trav = graph->first; + + while (trav) { + if (trav->init_succeeded) { + trav->cleanup_starting = 1; + trav->fini(trav); + if (trav->local_pool) { + mem_pool_destroy(trav->local_pool); + trav->local_pool = NULL; + } + if (trav->itable) { + inode_table_destroy(trav->itable); + trav->itable = NULL; + } + trav->init_succeeded = 0; + } + trav = trav->next; + } + + return 0; +} + +int glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, glusterfs_graph_t **newgraph) { @@ -1256,3 +1337,544 @@ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, return 0; } +int +glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx, + glusterfs_graph_t *parent_graph) +{ + if (parent_graph) { + if (parent_graph->first) { + xlator_destroy(parent_graph->first); + } + ctx->active = NULL; + GF_FREE(parent_graph); + parent_graph = NULL; + } + return 0; +} + +void * +glusterfs_graph_cleanup(void *arg) +{ + glusterfs_graph_t *graph = NULL; + glusterfs_ctx_t *ctx = THIS->ctx; + int ret = -1; + graph = arg; + + if (!graph) + return NULL; + + /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN + * Then wait for GF_EVENT_CHILD_DOWN to get on the top + * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed + * to fini. + * + * During fini call, this will take a last unref on rpc and + * rpc_transport_object. + */ + if (graph->first) + default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first); + + ret = pthread_mutex_lock(&graph->mutex); + if (ret != 0) { + gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, + "Failed to acquire a lock"); + goto out; + } + /* check and wait for CHILD_DOWN for top xlator*/ + while (graph->used) { + ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex); + if (ret != 0) + gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED, + "cond wait failed "); + } + + ret = pthread_mutex_unlock(&graph->mutex); + if (ret != 0) { + gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, + "Failed to release a lock"); + } + + /* Though we got a child down on top xlator, we have to wait until + * all the notifier to exit. Because there should not be any threads + * that access xl variables. + */ + pthread_mutex_lock(&ctx->notify_lock); + { + while (ctx->notifying) + pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock); + } + pthread_mutex_unlock(&ctx->notify_lock); + + pthread_mutex_lock(&ctx->cleanup_lock); + { + glusterfs_graph_fini(graph); + glusterfs_graph_destroy(graph); + } + pthread_mutex_unlock(&ctx->cleanup_lock); +out: + return NULL; +} + +glusterfs_graph_t * +glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name, + char *type) +{ + glusterfs_graph_t *parent_graph = NULL; + xlator_t *ixl = NULL; + int ret = -1; + parent_graph = GF_CALLOC(1, sizeof(*parent_graph), + gf_common_mt_glusterfs_graph_t); + if (!parent_graph) + goto out; + + INIT_LIST_HEAD(&parent_graph->list); + + ctx->active = parent_graph; + ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t); + if (!ixl) + goto out; + + ixl->ctx = ctx; + ixl->graph = parent_graph; + ixl->options = dict_new(); + if (!ixl->options) + goto out; + + ixl->name = gf_strdup(name); + if (!ixl->name) + goto out; + + ixl->is_autoloaded = 1; + + if (xlator_set_type(ixl, type) == -1) { + gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, + "%s (%s) set type failed", name, type); + goto out; + } + + glusterfs_graph_set_first(parent_graph, ixl); + parent_graph->top = ixl; + ixl = NULL; + + gettimeofday(&parent_graph->dob, NULL); + fill_uuid(parent_graph->graph_uuid, 128, parent_graph->dob); + parent_graph->id = ctx->graph_id++; + ret = 0; +out: + if (ixl) + xlator_destroy(ixl); + + if (ret) { + glusterfs_muxsvc_cleanup_parent(ctx, parent_graph); + parent_graph = NULL; + } + return parent_graph; +} + +int +glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj) +{ + if (!volfile_obj || !volfile_obj->pidfp) + return 0; + + gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id); + + lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0); + fclose(volfile_obj->pidfp); + volfile_obj->pidfp = NULL; + + return 0; +} + +int +glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) +{ + xlator_t *last_xl = NULL; + glusterfs_graph_t *graph = NULL; + glusterfs_graph_t *parent_graph = NULL; + pthread_t clean_graph = { + 0, + }; + int ret = -1; + xlator_t *xl = NULL; + + if (!ctx || !ctx->active || !volfile_obj) + goto out; + + pthread_mutex_lock(&ctx->cleanup_lock); + { + parent_graph = ctx->active; + graph = volfile_obj->graph; + if (!graph) + goto unlock; + if (graph->first) + xl = graph->first; + + last_xl = graph->last_xl; + if (last_xl) + last_xl->next = NULL; + if (!xl || xl->cleanup_starting) + goto unlock; + + xl->cleanup_starting = 1; + gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, + "detaching child %s", volfile_obj->vol_id); + + list_del_init(&volfile_obj->volfile_list); + glusterfs_mux_xlator_unlink(parent_graph->top, xl); + glusterfs_svc_mux_pidfile_cleanup(volfile_obj); + parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); + parent_graph->xl_count -= graph->xl_count; + parent_graph->leaf_count -= graph->leaf_count; + parent_graph->id++; + ret = 0; + } +unlock: + pthread_mutex_unlock(&ctx->cleanup_lock); +out: + if (!ret) { + list_del_init(&volfile_obj->volfile_list); + if (graph) { + ret = gf_thread_create_detached( + &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean"); + if (ret) { + gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, + LG_MSG_GRAPH_CLEANUP_FAILED, + "%s failed to create clean " + "up thread", + volfile_obj->vol_id); + ret = 0; + } + } + GF_FREE(volfile_obj); + } + return ret; +} + +int +glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file) +{ + int ret = -1; + FILE *pidfp = NULL; + + if (!pid_file || !volfile_obj) + goto out; + + if (volfile_obj->pidfp) { + ret = 0; + goto out; + } + pidfp = fopen(pid_file, "a+"); + if (!pidfp) { + goto out; + } + volfile_obj->pidfp = pidfp; + + ret = lockf(fileno(pidfp), F_TLOCK, 0); + if (ret) { + ret = 0; + goto out; + } +out: + return ret; +} + +int +glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj, + const char *pid_file, pid_t pid) +{ + int ret = 0; + FILE *pidfp = NULL; + int old_pid; + + if (!volfile_obj->pidfp) { + ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file); + if (ret == -1) + goto out; + } + pidfp = volfile_obj->pidfp; + ret = fscanf(pidfp, "%d", &old_pid); + if (ret <= 0) { + goto update; + } + if (old_pid == pid) { + ret = 0; + goto out; + } else { + gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, + "Old pid=%d found in pidfile %s. Cleaning the old pid and " + "Updating new pid=%d", + old_pid, pid_file, pid); + } +update: + ret = sys_ftruncate(fileno(pidfp), 0); + if (ret) { + gf_msg("glusterfsd", GF_LOG_ERROR, errno, + LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, + "pidfile %s truncation failed", pid_file); + goto out; + } + + ret = fprintf(pidfp, "%d\n", pid); + if (ret <= 0) { + gf_msg("glusterfsd", GF_LOG_ERROR, errno, + LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed", + pid_file); + goto out; + } + + ret = fflush(pidfp); + if (ret) { + gf_msg("glusterfsd", GF_LOG_ERROR, errno, + LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed", + pid_file); + goto out; + } +out: + return ret; +} + +int +glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj) +{ + char *file = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO("graph", dict, out); + GF_VALIDATE_OR_GOTO("graph", volfile_obj, out); + + ret = dict_get_str(dict, "pidfile", &file); + if (ret < 0) { + gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, + "Failed to get pidfile from dict for volfile_id=%s", + volfile_obj->vol_id); + } + + ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid()); + if (ret < 0) { + ret = -1; + gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, + "Failed to update " + "the pidfile for volfile_id=%s", + volfile_obj->vol_id); + + goto out; + } + + if (ret == 1) + gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, + "PID %d updated in pidfile=%s", getpid(), file); + ret = 0; +out: + return ret; +} +int +glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, + char *volfile_id, char *checksum, + dict_t *dict) +{ + glusterfs_graph_t *graph = NULL; + glusterfs_graph_t *parent_graph = NULL; + glusterfs_graph_t *clean_graph = NULL; + int ret = -1; + xlator_t *xl = NULL; + xlator_t *last_xl = NULL; + gf_volfile_t *volfile_obj = NULL; + pthread_t thread_id = { + 0, + }; + + if (!ctx) + goto out; + parent_graph = ctx->active; + graph = glusterfs_graph_construct(fp); + if (!graph) { + gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, + "failed to construct the graph"); + goto out; + } + graph->parent_down = 0; + graph->last_xl = glusterfs_get_last_xlator(graph); + + for (xl = graph->first; xl; xl = xl->next) { + if (strcmp(xl->type, "mount/fuse") == 0) { + gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, + LG_MSG_GRAPH_ATTACH_FAILED, + "fuse xlator cannot be specified in volume file"); + goto out; + } + } + + graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph)); + xl = graph->first; + /* TODO memory leaks everywhere need to free graph in case of error */ + if (glusterfs_graph_prepare(graph, ctx, xl->name)) { + gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, + "failed to prepare graph for xlator %s", xl->name); + ret = -1; + goto out; + } else if (glusterfs_graph_init(graph)) { + gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, + "failed to initialize graph for xlator %s", xl->name); + ret = -1; + goto out; + } else if (glusterfs_graph_parent_up(graph)) { + gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, + "failed to link the graphs for xlator %s ", xl->name); + ret = -1; + goto out; + } + + if (!parent_graph) { + parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd", + "debug/io-stats"); + if (!parent_graph) + goto out; + ((xlator_t *)parent_graph->top)->next = xl; + clean_graph = parent_graph; + } else { + last_xl = parent_graph->last_xl; + if (last_xl) + last_xl->next = xl; + xl->prev = last_xl; + } + parent_graph->last_xl = graph->last_xl; + + ret = glusterfs_xlator_link(parent_graph->top, xl); + if (ret) { + gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED, + "parent up notification failed"); + goto out; + } + parent_graph->xl_count += graph->xl_count; + parent_graph->leaf_count += graph->leaf_count; + parent_graph->id++; + + volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); + if (!volfile_obj) { + ret = -1; + goto out; + } + volfile_obj->pidfp = NULL; + snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", + volfile_id); + + if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) { + ret = glusterfs_update_mux_pid(dict, volfile_obj); + if (ret == -1) { + GF_FREE(volfile_obj); + goto out; + } + } + + graph->used = 1; + parent_graph->id++; + list_add(&graph->list, &ctx->graphs); + INIT_LIST_HEAD(&volfile_obj->volfile_list); + volfile_obj->graph = graph; + memcpy(volfile_obj->volfile_checksum, checksum, + sizeof(volfile_obj->volfile_checksum)); + list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list); + gf_log_dump_graph(fp, graph); + graph = NULL; + + ret = 0; +out: + if (ret) { + if (graph) { + gluster_graph_take_reference(graph->first); + ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup, + graph, "graph_clean"); + if (ret) { + gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, + LG_MSG_GRAPH_CLEANUP_FAILED, + "%s failed to create clean " + "up thread", + volfile_id); + ret = 0; + } + } + if (clean_graph) + glusterfs_muxsvc_cleanup_parent(ctx, clean_graph); + } + return ret; +} + +int +glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + gf_volfile_t *volfile_obj, char *checksum, + dict_t *dict) +{ + glusterfs_graph_t *oldvolfile_graph = NULL; + glusterfs_graph_t *newvolfile_graph = NULL; + char vol_id[NAME_MAX + 1]; + + int ret = -1; + + if (!ctx) { + gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, + "ctx is NULL"); + goto out; + } + + /* Change the message id */ + if (!volfile_obj) { + gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, + "failed to get volfile object"); + goto out; + } + + oldvolfile_graph = volfile_obj->graph; + if (!oldvolfile_graph) { + goto out; + } + + newvolfile_graph = glusterfs_graph_construct(newvolfile_fp); + + if (!newvolfile_graph) { + goto out; + } + newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph); + + glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first); + + if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) { + ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id); + if (ret < 0) + goto out; + ret = glusterfs_process_svc_detach(ctx, volfile_obj); + if (ret) { + gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, + LG_MSG_GRAPH_CLEANUP_FAILED, + "Could not detach " + "old graph. Aborting the reconfiguration operation"); + goto out; + } + volfile_obj = NULL; + ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id, + checksum, dict); + goto out; + } + + gf_msg_debug("glusterfsd-mgmt", 0, + "Only options have changed in the" + " new graph"); + + ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph); + if (ret) { + gf_msg_debug("glusterfsd-mgmt", 0, + "Could not reconfigure " + "new options in old graph"); + goto out; + } + memcpy(volfile_obj->volfile_checksum, checksum, + sizeof(volfile_obj->volfile_checksum)); + + ret = 0; +out: + + if (newvolfile_graph) + glusterfs_graph_destroy(newvolfile_graph); + + return ret; +} diff --git a/libglusterfs/src/graph.l b/libglusterfs/src/graph.l index 8af28a43539..b9d4b2b6828 100644 --- a/libglusterfs/src/graph.l +++ b/libglusterfs/src/graph.l @@ -14,35 +14,27 @@ %{ #define YYSTYPE char * -#include "xlator.h" +#include "glusterfs/xlator.h" #include "y.tab.h" #include <string.h> -#define START_STRSIZE 32 static char *text; -static int text_asize; static int text_size; void append_string(const char *str, int size) { - int new_size = text_size + size + 1; - if (new_size > text_asize) { - new_size += START_STRSIZE - 1; - new_size &= -START_STRSIZE; - if (!text) { - text = GF_CALLOC (1, new_size, - gf_common_mt_char); - } else { - text = GF_REALLOC (text, new_size); - } - if (!text) { - return; - } - text_asize = new_size; - } - memcpy(text + text_size, str, size); - text_size += size; - text[text_size] = 0; + int new_size = text_size + size + 1; + if (!text) { + text = GF_CALLOC (1, new_size, gf_common_mt_char); + } else { + text = GF_REALLOC (text, new_size); + } + if (!text) { + return; + } + memcpy(text + text_size, str, size); + text_size += size; + text[text_size] = 0; } %} @@ -65,12 +57,14 @@ TYPE [t][y][p][e] \\. { append_string (yytext + 1, yyleng - 1); } \" { if (0) { - yyunput (0, NULL); + yyunput (0, NULL); } BEGIN (INITIAL); graphyylval = text; + text = NULL; + text_size = 0; return STRING_TOK; - } + } } [^ \t\r\n\"\\]+ { graphyylval = gf_strdup (yytext) ; return ID; } [ \t\r\n]+ ; diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y index 3c3f7b5bb82..e63febdc08b 100644 --- a/libglusterfs/src/graph.y +++ b/libglusterfs/src/graph.y @@ -22,11 +22,11 @@ #define RELAX_POISONING -#include "xlator.h" -#include "graph-utils.h" -#include "logging.h" -#include "syscall.h" -#include "libglusterfs-messages.h" +#include "glusterfs/xlator.h" +#include "glusterfs/graph-utils.h" +#include "glusterfs/logging.h" +#include "glusterfs/syscall.h" +#include "glusterfs/libglusterfs-messages.h" static int new_volume (char *name); static int volume_type (char *type); @@ -123,7 +123,7 @@ new_volume (char *name) int ret = 0; if (!name) { - gf_msg_debug ("parser", 0,"Invalid argument name: '%s'", name); + gf_msg_debug ("parser", 0,"Invalid argument name"); ret = -1; goto out; } @@ -164,7 +164,8 @@ new_volume (char *name) goto out; } - curr->options = get_new_dict (); + INIT_LIST_HEAD(&curr->volume_options); + curr->options = dict_new (); if (!curr->options) { GF_FREE (curr->name); @@ -542,6 +543,9 @@ glusterfs_graph_new () INIT_LIST_HEAD (&graph->list); + pthread_mutex_init(&graph->mutex, NULL); + pthread_cond_init(&graph->child_down_cond, NULL); + gettimeofday (&graph->dob, NULL); return graph; diff --git a/libglusterfs/src/hashfn.c b/libglusterfs/src/hashfn.c index 5c4561f1ee8..d2237e99f83 100644 --- a/libglusterfs/src/hashfn.c +++ b/libglusterfs/src/hashfn.c @@ -11,24 +11,12 @@ #include <stdint.h> #include <stdlib.h> -#include "hashfn.h" - #define get16bits(d) (*((const uint16_t *)(d))) #define DM_DELTA 0x9E3779B9 #define DM_FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ #define DM_PARTROUNDS 6 /* 6 gets complete mixing */ -uint32_t -ReallySimpleHash(char *path, int len) -{ - uint32_t hash = 0; - for (; len > 0; len--) - hash ^= (char)path[len]; - - return hash; -} - /* This is apparently the "fastest hash function for strings". Written by Paul Hsieh <http://www.azillionmonkeys.com/qed/hash.html> diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 089aa6f9b21..dbadf77442d 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -8,21 +8,114 @@ cases as published by the Free Software Foundation. */ -#include "inode.h" -#include "common-utils.h" -#include "statedump.h" +#include "glusterfs/inode.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/statedump.h" #include <pthread.h> #include <sys/types.h> #include <stdint.h> -#include "list.h" -#include <time.h> +#include "glusterfs/list.h" #include <assert.h> -#include "libglusterfs-messages.h" +#include "glusterfs/libglusterfs-messages.h" /* TODO: move latest accessed dentry to list_head of inode */ +// clang-format off +/* + +Details as per Xavi: + + I think we should have 3 lists: active, lru and invalidate. + +We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of +refs, invalidate_sent flag and moving from one list to another must be done +atomically. + +With this information, these are the states that cause a transition: + + refs nlookups inv_sent op + 1 0 0 unref -> refs = 0, active--->destroy + 1 1 0 unref -> refs = 0, active--->lru + 1 1 0 forget -> nlookups = 0, active--->active + *0 1 0 forget -> nlookups = 0, lru--->destroy + *0 1 1 forget -> nlookups = 0, invalidate--->destroy + 0 1 0 ref -> refs = 1, lru--->active + 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active + 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate + 1 1 1 unref -> refs = 0, invalidate--->invalidate + 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active + +(*) technically these combinations cannot happen because a forget sent by the +kernel first calls ref() and then unref(). However it's equivalent. + +overflow means that lru list has grown beyond the limit and the inode needs to +be invalidated. All other combinations do not cause a change in state or are not +possible. + +Based on this, the code could be similar to this: + + ref(inode, inv) + { + if (refs == 0) { + if (inv_sent) { + invalidate_count--; + inv_sent = 0; + } else { + lru_count--; + } + if (inv) { + inv_sent = 1; + invalidate_count++; + list_move(inode, invalidate); + } else { + active_count++; + list_move(inode, active); + } + } + refs++; + } + + unref(inode, clear) + { + if (clear && inv_sent) { + // there is a case of fuse itself sending forget, without + // invalidate, after entry delete, like unlink(), rmdir(). + inv_sent = 0; + invalidate_count--; + active_count++; + list_move(inode, active); + } + refs--; + if ((refs == 0) && !inv_sent) { + active_count--; + if (nlookups == 0) { + destroy(inode); + } else { + lru_count++; + list_move(inode, lru); + } + } + } + + forget(inode) + { + ref(inode, false); + nlookups--; + unref(inode, true); + } + + overflow(inode) + { + ref(inode, true); + invalidator(inode); + unref(inode, false); + } + +*/ +// clang-format on + #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \ { \ int i = 1; \ @@ -37,7 +130,7 @@ } static inode_t * -__inode_unref(inode_t *inode); +__inode_unref(inode_t *inode, bool clear); static int inode_table_prune(inode_table_t *table); @@ -65,27 +158,15 @@ hash_dentry(inode_t *parent, const char *name, int mod) static int hash_gfid(uuid_t uuid, int mod) { - int ret = 0; - - ret = uuid[15] + (uuid[14] << 8); - - return ret; + return ((uuid[15] + (uuid[14] << 8)) % mod); } static void -__dentry_hash(dentry_t *dentry) +__dentry_hash(dentry_t *dentry, const int hash) { inode_table_t *table = NULL; - int hash = 0; - - if (!dentry) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, - "dentry not found"); - return; - } table = dentry->inode->table; - hash = hash_dentry(dentry->parent, dentry->name, table->hashsize); list_del_init(&dentry->hash); list_add(&dentry->hash, &table->name_hash[hash]); @@ -94,49 +175,44 @@ __dentry_hash(dentry_t *dentry) static int __is_dentry_hashed(dentry_t *dentry) { - if (!dentry) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, - "dentry not found"); - return 0; - } - return !list_empty(&dentry->hash); } static void __dentry_unhash(dentry_t *dentry) { - if (!dentry) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, - "dentry not found"); - return; - } - list_del_init(&dentry->hash); } static void -__dentry_unset(dentry_t *dentry) +dentry_destroy(dentry_t *dentry) { - if (!dentry) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, - "dentry not found"); + if (!dentry) return; - } + + GF_FREE(dentry->name); + dentry->name = NULL; + mem_put(dentry); + + return; +} + +static dentry_t * +__dentry_unset(dentry_t *dentry) +{ + if (!dentry) + return NULL; __dentry_unhash(dentry); list_del_init(&dentry->inode_list); - GF_FREE(dentry->name); - dentry->name = NULL; - if (dentry->parent) { - __inode_unref(dentry->parent); + __inode_unref(dentry->parent, false); dentry->parent = NULL; } - mem_put(dentry); + return dentry; } static int @@ -156,15 +232,15 @@ __foreach_ancestor_dentry(dentry_t *dentry, ret = per_dentry_fn(dentry, data); if (ret) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PER_DENTRY_FAILED, - "per dentry fn returned %d", ret); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PER_DENTRY_FAILED, + "ret=%d", ret, NULL); goto out; } parent = dentry->parent; if (!parent) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PARENT_DENTRY_NOT_FOUND, - "parent not found"); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PARENT_DENTRY_NOT_FOUND, + NULL); goto out; } @@ -195,22 +271,12 @@ static int __is_dentry_cyclic(dentry_t *dentry) { int ret = 0; - inode_t *inode = NULL; - char *name = "<nul>"; ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode); if (ret) { - inode = dentry->inode; - - if (dentry->name) - name = dentry->name; - - gf_msg(dentry->inode->table->name, GF_LOG_CRITICAL, 0, - LG_MSG_DENTRY_CYCLIC_LOOP, - "detected cyclic loop " - "formation during inode linkage. inode (%s) linking " - "under itself as %s", - uuid_utoa(inode->gfid), name); + gf_smsg(dentry->inode->table->name, GF_LOG_CRITICAL, 0, + LG_MSG_DENTRY_CYCLIC_LOOP, "gfid=%s name=-%s", + uuid_utoa(dentry->inode->gfid), dentry->name, NULL); } return ret; @@ -219,41 +285,19 @@ __is_dentry_cyclic(dentry_t *dentry) static void __inode_unhash(inode_t *inode) { - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return; - } - list_del_init(&inode->hash); } static int __is_inode_hashed(inode_t *inode) { - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return 0; - } - return !list_empty(&inode->hash); } static void -__inode_hash(inode_t *inode) +__inode_hash(inode_t *inode, const int hash) { - inode_table_t *table = NULL; - int hash = 0; - - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return; - } - - table = inode->table; - hash = hash_gfid(inode->gfid, 65536); + inode_table_t *table = inode->table; list_del_init(&inode->hash); list_add(&inode->hash, &table->inode_hash[hash]); @@ -265,12 +309,6 @@ __dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name) dentry_t *dentry = NULL; dentry_t *tmp = NULL; - if (!inode || !name) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "inode || name not found"); - return NULL; - } - /* earlier, just the ino was sent, which could have been 0, now we deal with gfid, and if sent gfid is null or 0, no need to continue with the check */ @@ -296,26 +334,20 @@ __inode_ctx_free(inode_t *inode) xlator_t *xl = NULL; xlator_t *old_THIS = NULL; - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return; - } - if (!inode->_ctx) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL, - "_ctx not found"); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL, NULL); goto noctx; } - for (index = 0; index < inode->table->xl->graph->xl_count; index++) { + for (index = 0; index < inode->table->ctxcount; index++) { if (inode->_ctx[index].value1 || inode->_ctx[index].value2) { xl = (xlator_t *)(long)inode->_ctx[index].xl_key; - old_THIS = THIS; - THIS = xl; - if (!xl->call_cleanup && xl->cbks->forget) + if (xl && !xl->call_cleanup && xl->cbks->forget) { + old_THIS = THIS; + THIS = xl; xl->cbks->forget(xl, inode); - THIS = old_THIS; + THIS = old_THIS; + } } } @@ -329,12 +361,6 @@ noctx: static void __inode_destroy(inode_t *inode) { - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return; - } - __inode_ctx_free(inode); LOCK_DESTROY(&inode->lock); @@ -377,9 +403,6 @@ inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode) static void __inode_activate(inode_t *inode) { - if (!inode) - return; - list_move(&inode->list, &inode->table->active); inode->table->active_size++; } @@ -390,19 +413,13 @@ __inode_passivate(inode_t *inode) dentry_t *dentry = NULL; dentry_t *t = NULL; - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return; - } - list_move_tail(&inode->list, &inode->table->lru); inode->table->lru_size++; list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list) { if (!__is_dentry_hashed(dentry)) - __dentry_unset(dentry); + dentry_destroy(__dentry_unset(dentry)); } } @@ -412,12 +429,6 @@ __inode_retire(inode_t *inode) dentry_t *dentry = NULL; dentry_t *t = NULL; - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return; - } - list_move_tail(&inode->list, &inode->table->purge); inode->table->purge_size++; @@ -425,7 +436,7 @@ __inode_retire(inode_t *inode) list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list) { - __dentry_unset(dentry); + dentry_destroy(__dentry_unset(dentry)); } } @@ -446,15 +457,11 @@ out: } static inode_t * -__inode_unref(inode_t *inode) +__inode_unref(inode_t *inode, bool clear) { int index = 0; xlator_t *this = NULL; - - if (!inode) - return NULL; - - this = THIS; + uint64_t nlookup = 0; /* * Root inode should always be in active list of inode table. So unrefs @@ -463,6 +470,48 @@ __inode_unref(inode_t *inode) if (__is_root_gfid(inode->gfid)) return inode; + /* + * No need to acquire inode table's lock + * as __inode_unref is called after acquiding + * the inode table's lock. + */ + if (inode->table->cleanup_started && !inode->ref) + /* + * There is a good chance that, the inode + * on which unref came has already been + * zero refed and added to the purge list. + * This can happen when inode table is + * being destroyed (glfs_fini is something + * which destroys the inode table). + * + * Consider a directory 'a' which has a file + * 'b'. Now as part of inode table destruction + * zero refing of inodes does not happen from + * leaf to the root. It happens in the order + * inodes are present in the list. So, in this + * example, the dentry of 'b' would have its + * parent set to the inode of 'a'. So if + * 'a' gets zero refed first (as part of + * inode table cleanup) and then 'b' has to + * zero refed, then dentry_unset is called on + * the dentry of 'b' and it further goes on to + * call inode_unref on b's parent which is 'a'. + * In this situation, GF_ASSERT would be called + * below as the refcount of 'a' has been already set + * to zero. + * + * So return the inode if the inode table cleanup + * has already started and inode refcount is 0. + */ + return inode; + + this = THIS; + + if (clear && inode->in_invalidate_list) { + inode->in_invalidate_list = false; + inode->table->invalidate_size--; + __inode_activate(inode); + } GF_ASSERT(inode->ref); --inode->ref; @@ -473,10 +522,11 @@ __inode_unref(inode_t *inode) inode->_ctx[index].ref--; } - if (!inode->ref) { + if (!inode->ref && !inode->in_invalidate_list) { inode->table->active_size--; - if (inode->nlookup) + nlookup = GF_ATOMIC_GET(inode->nlookup); + if (nlookup) __inode_passivate(inode); else __inode_retire(inode); @@ -486,7 +536,7 @@ __inode_unref(inode_t *inode) } static inode_t * -__inode_ref(inode_t *inode) +__inode_ref(inode_t *inode, bool is_invalidate) { int index = 0; xlator_t *this = NULL; @@ -496,11 +546,6 @@ __inode_ref(inode_t *inode) this = THIS; - if (!inode->ref) { - inode->table->lru_size--; - __inode_activate(inode); - } - /* * Root inode should always be in active list of inode table. So unrefs * on root inode are no-ops. If we do not allow unrefs but allow refs, @@ -512,6 +557,22 @@ __inode_ref(inode_t *inode) if (__is_root_gfid(inode->gfid) && inode->ref) return inode; + if (!inode->ref) { + if (inode->in_invalidate_list) { + inode->in_invalidate_list = false; + inode->table->invalidate_size--; + } else { + inode->table->lru_size--; + } + if (is_invalidate) { + inode->in_invalidate_list = true; + inode->table->invalidate_size++; + list_move_tail(&inode->list, &inode->table->invalidate); + } else { + __inode_activate(inode); + } + } + inode->ref++; index = __inode_get_xl_index(inode, this); @@ -535,7 +596,7 @@ inode_unref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_unref(inode); + inode = __inode_unref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -556,7 +617,7 @@ inode_ref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_ref(inode); + inode = __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -564,16 +625,10 @@ inode_ref(inode_t *inode) } static dentry_t * -__dentry_create(inode_t *inode, inode_t *parent, const char *name) +dentry_create(inode_t *inode, inode_t *parent, const char *name) { dentry_t *newd = NULL; - if (!inode || !parent || !name) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "inode || parent || name not found"); - return NULL; - } - newd = mem_get0(parent->table->dentry_pool); if (newd == NULL) { goto out; @@ -589,10 +644,6 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name) goto out; } - if (parent) - newd->parent = __inode_ref(parent); - - list_add(&newd->inode_list, &inode->dentry_list); newd->inode = inode; out: @@ -600,18 +651,10 @@ out: } static inode_t * -__inode_create(inode_table_t *table) +inode_create(inode_table_t *table) { inode_t *newi = NULL; - if (!table) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, - LG_MSG_INODE_TABLE_NOT_FOUND, - "table not " - "found"); - return NULL; - } - newi = mem_get0(table->inode_pool); if (!newi) { goto out; @@ -635,11 +678,7 @@ __inode_create(inode_table_t *table) goto out; } - list_add(&newi->list, &table->lru); - table->lru_size++; - out: - return newi; } @@ -656,14 +695,16 @@ inode_new(inode_table_t *table) return NULL; } - pthread_mutex_lock(&table->lock); - { - inode = __inode_create(table); - if (inode != NULL) { - __inode_ref(inode); + inode = inode_create(table); + if (inode) { + pthread_mutex_lock(&table->lock); + { + list_add(&inode->list, &table->lru); + table->lru_size++; + __inode_ref(inode, false); } + pthread_mutex_unlock(&table->lock); } - pthread_mutex_unlock(&table->lock); return inode; } @@ -680,8 +721,7 @@ inode_new(inode_table_t *table) static inode_t * __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) { - if (!inode) - return NULL; + uint64_t nlookup = 0; GF_ASSERT(inode->ref >= nref); @@ -693,7 +733,8 @@ __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) if (!inode->ref) { inode->table->active_size--; - if (inode->nlookup) + nlookup = GF_ATOMIC_GET(inode->nlookup); + if (nlookup) __inode_passivate(inode); else __inode_retire(inode); @@ -703,44 +744,30 @@ __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) } static inode_t * -__inode_lookup(inode_t *inode) +inode_forget_atomic(inode_t *inode, uint64_t nlookup) { - if (!inode) - return NULL; + uint64_t inode_lookup = 0; - inode->nlookup++; - - return inode; -} - -static inode_t * -__inode_forget(inode_t *inode, uint64_t nlookup) -{ if (!inode) return NULL; - GF_ASSERT(inode->nlookup >= nlookup); - - inode->nlookup -= nlookup; - - if (!nlookup) - inode->nlookup = 0; + if (nlookup == 0) { + GF_ATOMIC_INIT(inode->nlookup, 0); + } else { + inode_lookup = GF_ATOMIC_FETCH_SUB(inode->nlookup, nlookup); + GF_ASSERT(inode_lookup >= nlookup); + } return inode; } dentry_t * -__dentry_grep(inode_table_t *table, inode_t *parent, const char *name) +__dentry_grep(inode_table_t *table, inode_t *parent, const char *name, + const int hash) { - int hash = 0; dentry_t *dentry = NULL; dentry_t *tmp = NULL; - if (!table || !name || !parent) - return NULL; - - hash = hash_dentry(parent, name, table->hashsize); - list_for_each_entry(tmp, &table->name_hash[hash], hash) { if (tmp->parent == parent && !strcmp(tmp->name, name)) { @@ -765,15 +792,16 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) return NULL; } + int hash = hash_dentry(parent, name, table->hashsize); + pthread_mutex_lock(&table->lock); { - dentry = __dentry_grep(table, parent, name); - - if (dentry) + dentry = __dentry_grep(table, parent, name, hash); + if (dentry) { inode = dentry->inode; - - if (inode) - __inode_ref(inode); + if (inode) + __inode_ref(inode, false); + } } pthread_mutex_unlock(&table->lock); @@ -792,6 +820,9 @@ inode_resolve(inode_table_t *table, char *path) parent = inode_ref(table->root); str = tmp = gf_strdup(path); + if (str == NULL) { + goto out; + } while (1) { bname = strtok_r(str, "/", &saveptr); @@ -837,17 +868,18 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name, return ret; } + int hash = hash_dentry(parent, name, table->hashsize); + pthread_mutex_lock(&table->lock); { - dentry = __dentry_grep(table, parent, name); - - if (dentry) + dentry = __dentry_grep(table, parent, name, hash); + if (dentry) { inode = dentry->inode; - - if (inode) { - gf_uuid_copy(gfid, inode->gfid); - *type = inode->ia_type; - ret = 0; + if (inode) { + gf_uuid_copy(gfid, inode->gfid); + *type = inode->ia_type; + ret = 0; + } } } pthread_mutex_unlock(&table->lock); @@ -868,25 +900,14 @@ __is_root_gfid(uuid_t gfid) } inode_t * -__inode_find(inode_table_t *table, uuid_t gfid) +__inode_find(inode_table_t *table, uuid_t gfid, const int hash) { inode_t *inode = NULL; inode_t *tmp = NULL; - int hash = 0; - - if (!table) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, - LG_MSG_INODE_TABLE_NOT_FOUND, - "table not " - "found"); - goto out; - } if (__is_root_gfid(gfid)) return table->root; - hash = hash_gfid(gfid, 65536); - list_for_each_entry(tmp, &table->inode_hash[hash], hash) { if (gf_uuid_compare(tmp->gfid, gfid) == 0) { @@ -895,7 +916,6 @@ __inode_find(inode_table_t *table, uuid_t gfid) } } -out: return inode; } @@ -912,11 +932,13 @@ inode_find(inode_table_t *table, uuid_t gfid) return NULL; } + int hash = hash_gfid(gfid, 65536); + pthread_mutex_lock(&table->lock); { - inode = __inode_find(table, gfid); + inode = __inode_find(table, gfid, hash); if (inode) - __inode_ref(inode); + __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -925,24 +947,16 @@ inode_find(inode_table_t *table, uuid_t gfid) static inode_t * __inode_link(inode_t *inode, inode_t *parent, const char *name, - struct iatt *iatt) + struct iatt *iatt, const int dhash) { dentry_t *dentry = NULL; dentry_t *old_dentry = NULL; inode_t *old_inode = NULL; inode_table_t *table = NULL; inode_t *link_inode = NULL; - - if (!inode) { - errno = EINVAL; - return NULL; - } + char link_uuid_str[64] = {0}, parent_uuid_str[64] = {0}; table = inode->table; - if (!table) { - errno = EINVAL; - return NULL; - } if (parent) { /* We should prevent inode linking between different @@ -980,14 +994,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, return NULL; } - old_inode = __inode_find(table, iatt->ia_gfid); + int ihash = hash_gfid(iatt->ia_gfid, 65536); + + old_inode = __inode_find(table, iatt->ia_gfid, ihash); if (old_inode) { link_inode = old_inode; } else { gf_uuid_copy(inode->gfid, iatt->ia_gfid); inode->ia_type = iatt->ia_type; - __inode_hash(inode); + __inode_hash(inode, ihash); } } else { /* @old_inode serves another important purpose - it indicates @@ -1002,40 +1018,40 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, old_inode = inode; } - if (name) { - if (!strcmp(name, ".") || !strcmp(name, "..")) - return link_inode; - - if (strchr(name, '/')) { - GF_ASSERT(!"inode link attempted with '/' in name"); - return NULL; - } + if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) { + return link_inode; } /* use only link_inode beyond this point */ if (parent) { - old_dentry = __dentry_grep(table, parent, name); + old_dentry = __dentry_grep(table, parent, name, dhash); if (!old_dentry || old_dentry->inode != link_inode) { - dentry = __dentry_create(link_inode, parent, name); + dentry = dentry_create(link_inode, parent, name); if (!dentry) { - gf_msg_callingfn( - THIS->name, GF_LOG_ERROR, 0, LG_MSG_DENTRY_CREATE_FAILED, - "dentry create failed on " - "inode %s with parent %s", - uuid_utoa(link_inode->gfid), uuid_utoa(parent->gfid)); + gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, + LG_MSG_DENTRY_CREATE_FAILED, + "dentry create failed on " + "inode %s with parent %s", + uuid_utoa_r(link_inode->gfid, link_uuid_str), + uuid_utoa_r(parent->gfid, parent_uuid_str)); errno = ENOMEM; return NULL; } + + /* dentry linking needs to happen inside lock */ + dentry->parent = __inode_ref(parent, false); + list_add(&dentry->inode_list, &link_inode->dentry_list); + if (old_inode && __is_dentry_cyclic(dentry)) { errno = ELOOP; - __dentry_unset(dentry); + dentry_destroy(__dentry_unset(dentry)); return NULL; } - __dentry_hash(dentry); + __dentry_hash(dentry, dhash); if (old_dentry) - __dentry_unset(old_dentry); + dentry_destroy(__dentry_unset(old_dentry)); } } @@ -1045,6 +1061,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, inode_t * inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) { + int hash = 0; inode_table_t *table = NULL; inode_t *linked_inode = NULL; @@ -1056,12 +1073,20 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) table = inode->table; + if (parent && name) { + hash = hash_dentry(parent, name, table->hashsize); + } + + if (name && strchr(name, '/')) { + GF_ASSERT(!"inode link attempted with '/' in name"); + return NULL; + } + pthread_mutex_lock(&table->lock); { - linked_inode = __inode_link(inode, parent, name, iatt); - + linked_inode = __inode_link(inode, parent, name, iatt, hash); if (linked_inode) - __inode_ref(linked_inode); + __inode_ref(linked_inode, false); } pthread_mutex_unlock(&table->lock); @@ -1073,6 +1098,20 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) int inode_lookup(inode_t *inode) { + if (!inode) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, + "inode not found"); + return -1; + } + + GF_ATOMIC_INC(inode->nlookup); + + return 0; +} + +int +inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) +{ inode_table_t *table = NULL; if (!inode) { @@ -1085,15 +1124,17 @@ inode_lookup(inode_t *inode) pthread_mutex_lock(&table->lock); { - __inode_lookup(inode); + __inode_ref_reduce_by_n(inode, nref); } pthread_mutex_unlock(&table->lock); + inode_table_prune(table); + return 0; } int -inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) +inode_forget(inode_t *inode, uint64_t nlookup) { inode_table_t *table = NULL; @@ -1105,11 +1146,7 @@ inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) table = inode->table; - pthread_mutex_lock(&table->lock); - { - __inode_ref_reduce_by_n(inode, nref); - } - pthread_mutex_unlock(&table->lock); + inode_forget_atomic(inode, nlookup); inode_table_prune(table); @@ -1117,7 +1154,7 @@ inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) } int -inode_forget(inode_t *inode, uint64_t nlookup) +inode_forget_with_unref(inode_t *inode, uint64_t nlookup) { inode_table_t *table = NULL; @@ -1131,7 +1168,8 @@ inode_forget(inode_t *inode, uint64_t nlookup) pthread_mutex_lock(&table->lock); { - __inode_forget(inode, nlookup); + inode_forget_atomic(inode, nlookup); + __inode_unref(inode, true); } pthread_mutex_unlock(&table->lock); @@ -1189,48 +1227,47 @@ inode_invalidate(inode_t *inode) return ret; } -static void +static dentry_t * __inode_unlink(inode_t *inode, inode_t *parent, const char *name) { dentry_t *dentry = NULL; char pgfid[64] = {0}; char gfid[64] = {0}; - if (!inode || !parent || !name) - return; - dentry = __dentry_search_for_inode(inode, parent->gfid, name); /* dentry NULL for corrupted backend */ if (dentry) { - __dentry_unset(dentry); + dentry = __dentry_unset(dentry); } else { - gf_msg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, - "%s/%s: dentry not found in %s", - uuid_utoa_r(parent->gfid, pgfid), name, - uuid_utoa_r(inode->gfid, gfid)); + gf_smsg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, + "parent-gfid=%s name=%s gfid%s", + uuid_utoa_r(parent->gfid, pgfid), name, + uuid_utoa_r(inode->gfid, gfid), NULL); } + + return dentry; } void inode_unlink(inode_t *inode, inode_t *parent, const char *name) { - inode_table_t *table = NULL; + inode_table_t *table; + dentry_t *dentry; - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); + if (!inode || !parent || !name) return; - } table = inode->table; pthread_mutex_lock(&table->lock); { - __inode_unlink(inode, parent, name); + dentry = __inode_unlink(inode, parent, name); } pthread_mutex_unlock(&table->lock); + dentry_destroy(dentry); + inode_table_prune(table); } @@ -1239,6 +1276,9 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, inode_t *dstdir, const char *dstname, inode_t *inode, struct iatt *iatt) { + int hash = 0; + dentry_t *dentry = NULL; + if (!inode) { gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, "inode not found"); @@ -1247,13 +1287,26 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, table = inode->table; + if (dstname && strchr(dstname, '/')) { + GF_ASSERT(!"inode link attempted with '/' in name"); + return -1; + } + + if (dstdir && dstname) { + hash = hash_dentry(dstdir, dstname, table->hashsize); + } + pthread_mutex_lock(&table->lock); { - __inode_link(inode, dstdir, dstname, iatt); - __inode_unlink(inode, srcdir, srcname); + __inode_link(inode, dstdir, dstname, iatt, hash); + /* pick the old dentry */ + dentry = __inode_unlink(inode, srcdir, srcname); } pthread_mutex_unlock(&table->lock); + /* free the old dentry */ + dentry_destroy(dentry); + inode_table_prune(table); return 0; @@ -1314,7 +1367,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name) parent = dentry->parent; if (parent) - __inode_ref(parent); + __inode_ref(parent, false); } pthread_mutex_unlock(&table->lock); @@ -1324,12 +1377,6 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name) static int __inode_has_dentry(inode_t *inode) { - if (!inode) { - gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, - "inode not found"); - return 0; - } - return !list_empty(&inode->dentry_list); } @@ -1338,6 +1385,12 @@ inode_has_dentry(inode_t *inode) { int dentry_present = 0; + if (!inode) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, + "inode not found"); + return 0; + } + LOCK(&inode->lock); { dentry_present = __inode_has_dentry(inode); @@ -1374,10 +1427,8 @@ __inode_path(inode_t *inode, const char *name, char **bufp) i++; /* "/" */ i += strlen(trav->name); if (i > PATH_MAX) { - gf_msg(table->name, GF_LOG_CRITICAL, 0, LG_MSG_DENTRY_CYCLIC_LOOP, - "possible infinite " - "loop detected, forcing break. name=(%s)", - name); + gf_smsg(table->name, GF_LOG_CRITICAL, 0, LG_MSG_DENTRY_CYCLIC_LOOP, + "name=%s", name, NULL); ret = -ENOENT; goto out; } @@ -1401,7 +1452,7 @@ __inode_path(inode_t *inode, const char *name, char **bufp) if (name) { len = strlen(name); - strncpy(buf + (i - len), name, len); + memcpy(buf + (i - len), name, len); buf[i - len - 1] = '/'; i -= (len + 1); } @@ -1411,7 +1462,7 @@ __inode_path(inode_t *inode, const char *name, char **bufp) trav = __dentry_search_arbit(itrav)) { itrav = trav->parent; len = strlen(trav->name); - strncpy(buf + (i - len), trav->name, len); + memcpy(buf + (i - len), trav->name, len); buf[i - len - 1] = '/'; i -= (len + 1); } @@ -1490,12 +1541,15 @@ static int inode_table_prune(inode_table_t *table) { int ret = 0; + int ret1 = 0; struct list_head purge = { 0, }; inode_t *del = NULL; inode_t *tmp = NULL; inode_t *entry = NULL; + uint64_t nlookup = 0; + int64_t lru_size = 0; if (!table) return -1; @@ -1504,7 +1558,11 @@ inode_table_prune(inode_table_t *table) pthread_mutex_lock(&table->lock); { - while (table->lru_limit && table->lru_size > (table->lru_limit)) { + if (!table->lru_limit) + goto purge_list; + + lru_size = table->lru_size; + while (lru_size > (table->lru_limit)) { if (list_empty(&table->lru)) { gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_INODE_LIST, @@ -1514,26 +1572,60 @@ inode_table_prune(inode_table_t *table) break; } + lru_size--; entry = list_entry(table->lru.next, inode_t, list); + /* The logic of invalidation is required only if invalidator_fn + is present */ + if (table->invalidator_fn) { + /* check for valid inode with 'nlookup' */ + nlookup = GF_ATOMIC_GET(entry->nlookup); + if (nlookup) { + if (entry->invalidate_sent) { + list_move_tail(&entry->list, &table->lru); + continue; + } + __inode_ref(entry, true); + tmp = entry; + break; + } + } table->lru_size--; __inode_retire(entry); - ret++; } + purge_list: list_splice_init(&table->purge, &purge); table->purge_size = 0; } pthread_mutex_unlock(&table->lock); - { - list_for_each_entry_safe(del, tmp, &purge, list) + /* Pick 1 inode for invalidation */ + if (tmp) { + xlator_t *old_THIS = THIS; + THIS = table->invalidator_xl; + ret1 = table->invalidator_fn(table->invalidator_xl, tmp); + THIS = old_THIS; + pthread_mutex_lock(&table->lock); { - list_del_init(&del->list); - __inode_forget(del, 0); - __inode_destroy(del); + if (!ret1) { + tmp->invalidate_sent = true; + __inode_unref(tmp, false); + } else { + /* Move this back to the lru list*/ + __inode_unref(tmp, true); + } } + pthread_mutex_unlock(&table->lock); + } + + /* Just so that if purge list is handled too, then clear it off */ + list_for_each_entry_safe(del, tmp, &purge, list) + { + list_del_init(&del->list); + inode_forget_atomic(del, 0); + __inode_destroy(del); } return ret; @@ -1550,20 +1642,26 @@ __inode_table_init_root(inode_table_t *table) if (!table) return; - root = __inode_create(table); + root = inode_create(table); + + list_add(&root->list, &table->lru); + table->lru_size++; iatt.ia_gfid[15] = 1; iatt.ia_ino = 1; iatt.ia_type = IA_IFDIR; - __inode_link(root, NULL, NULL, &iatt); + __inode_link(root, NULL, NULL, &iatt, 0); table->root = root; } inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl) +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl) { inode_table_t *new = NULL; + uint32_t mem_pool_size = lru_limit; int ret = -1; int i = 0; @@ -1575,20 +1673,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl) new->ctxcount = xl->graph->xl_count + 1; new->lru_limit = lru_limit; + new->invalidator_fn = invalidator_fn; + new->invalidator_xl = invalidator_xl; new->hashsize = 14057; /* TODO: Random Number?? */ /* In case FUSE is initing the inode table. */ - if (lru_limit == 0) - lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; - - new->inode_pool = mem_pool_new(inode_t, lru_limit); + if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) + mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; + new->inode_pool = mem_pool_new(inode_t, mem_pool_size); if (!new->inode_pool) goto out; - new->dentry_pool = mem_pool_new(dentry_t, lru_limit); - + new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size); if (!new->dentry_pool) goto out; @@ -1620,6 +1718,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl) INIT_LIST_HEAD(&new->active); INIT_LIST_HEAD(&new->lru); INIT_LIST_HEAD(&new->purge); + INIT_LIST_HEAD(&new->invalidate); ret = gf_asprintf(&new->name, "%s/inode", xl->name); if (-1 == ret) { @@ -1627,6 +1726,8 @@ inode_table_new(size_t lru_limit, xlator_t *xl) ; } + new->cleanup_started = _gf_false; + __inode_table_init_root(new); pthread_mutex_init(&new->lock, NULL); @@ -1649,6 +1750,13 @@ out: return new; } +inode_table_t * +inode_table_new(uint32_t lru_limit, xlator_t *xl) +{ + /* Only fuse for now requires the inode table with invalidator */ + return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); +} + int inode_table_ctx_free(inode_table_t *table) { @@ -1770,6 +1878,7 @@ inode_table_destroy(inode_table_t *inode_table) */ pthread_mutex_lock(&inode_table->lock); { + inode_table->cleanup_started = _gf_true; /* Process lru list first as we need to unset their dentry * entries (the ones which may not be unset during * '__inode_passivate' as they were hashed) which in turn @@ -1782,24 +1891,32 @@ inode_table_destroy(inode_table_t *inode_table) */ while (!list_empty(&inode_table->lru)) { trav = list_first_entry(&inode_table->lru, inode_t, list); - __inode_forget(trav, 0); + inode_forget_atomic(trav, 0); __inode_retire(trav); inode_table->lru_size--; } + /* Same logic for invalidate list */ + while (!list_empty(&inode_table->invalidate)) { + trav = list_first_entry(&inode_table->invalidate, inode_t, list); + inode_forget_atomic(trav, 0); + __inode_retire(trav); + inode_table->invalidate_size--; + } + while (!list_empty(&inode_table->active)) { trav = list_first_entry(&inode_table->active, inode_t, list); /* forget and unref the inode to retire and add it to * purge list. By this time there should not be any * inodes present in the active list except for root * inode. Its a ref_leak otherwise. */ - if (trav != inode_table->root) + if (trav && (trav != inode_table->root)) gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_REF_COUNT, "Active inode(%p) with refcount" "(%d) found during cleanup", trav, trav->ref); - __inode_forget(trav, 0); + inode_forget_atomic(trav, 0); __inode_ref_reduce_by_n(trav, 0); } } @@ -2277,6 +2394,7 @@ inode_dump(inode_t *inode, char *prefix) struct list_head fd_list; int ref = 0; char key[GF_DUMP_MAX_BUF_LEN]; + uint64_t nlookup = 0; if (!inode) return; @@ -2289,11 +2407,13 @@ inode_dump(inode_t *inode, char *prefix) } { + nlookup = GF_ATOMIC_GET(inode->nlookup); gf_proc_dump_write("gfid", "%s", uuid_utoa(inode->gfid)); - gf_proc_dump_write("nlookup", "%ld", inode->nlookup); + gf_proc_dump_write("nlookup", "%" PRIu64, nlookup); gf_proc_dump_write("fd-count", "%u", inode->fd_count); gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count); gf_proc_dump_write("ref", "%u", inode->ref); + gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent); gf_proc_dump_write("ia_type", "%d", inode->ia_type); if (inode->_ctx) { inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx), @@ -2367,10 +2487,13 @@ inode_table_dump(inode_table_t *itable, char *prefix) gf_proc_dump_write(key, "%d", itable->lru_size); gf_proc_dump_build_key(key, prefix, "purge_size"); gf_proc_dump_write(key, "%d", itable->purge_size); + gf_proc_dump_build_key(key, prefix, "invalidate_size"); + gf_proc_dump_write(key, "%d", itable->invalidate_size); INODE_DUMP_LIST(&itable->active, key, prefix, "active"); INODE_DUMP_LIST(&itable->lru, key, prefix, "lru"); INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); + INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate"); pthread_mutex_unlock(&itable->lock); } @@ -2382,6 +2505,7 @@ inode_dump_to_dict(inode_t *inode, char *prefix, dict_t *dict) char key[GF_DUMP_MAX_BUF_LEN] = { 0, }; + uint64_t nlookup = 0; ret = TRY_LOCK(&inode->lock); if (ret) @@ -2393,7 +2517,8 @@ inode_dump_to_dict(inode_t *inode, char *prefix, dict_t *dict) goto out; snprintf(key, sizeof(key), "%s.nlookup", prefix); - ret = dict_set_uint64(dict, key, inode->nlookup); + nlookup = GF_ATOMIC_GET(inode->nlookup); + ret = dict_set_uint64(dict, key, nlookup); if (ret) goto out; @@ -2404,6 +2529,8 @@ inode_dump_to_dict(inode_t *inode, char *prefix, dict_t *dict) snprintf(key, sizeof(key), "%s.ia_type", prefix); ret = dict_set_int32(dict, key, inode->ia_type); + if (ret) + goto out; out: UNLOCK(&inode->lock); @@ -2417,13 +2544,19 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) 0, }; int ret = 0; +#ifdef DEBUG inode_t *inode = NULL; int count = 0; - +#endif ret = pthread_mutex_trylock(&itable->lock); if (ret) return; + snprintf(key, sizeof(key), "%s.itable.lru_limit", prefix); + ret = dict_set_uint32(dict, key, itable->lru_limit); + if (ret) + goto out; + snprintf(key, sizeof(key), "%s.itable.active_size", prefix); ret = dict_set_uint32(dict, key, itable->active_size); if (ret) @@ -2439,6 +2572,13 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) if (ret) goto out; +#ifdef DEBUG + /* Dumping inode details in dictionary and sending it to CLI is not + required as when a developer (or support team) asks for this command + output, they just want to get top level detail of inode table. + If one wants to debug, let them take statedump and debug, this + wouldn't be available in CLI during production setup. + */ list_for_each_entry(inode, &itable->active, list) { snprintf(key, sizeof(key), "%s.itable.active%d", prefix, count++); @@ -2458,6 +2598,7 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) snprintf(key, sizeof(key), "%s.itable.purge%d", prefix, count++); inode_dump_to_dict(inode, key, dict); } +#endif out: pthread_mutex_unlock(&itable->lock); diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c index c9e0ff35198..4e7d2958764 100644 --- a/libglusterfs/src/iobuf.c +++ b/libglusterfs/src/iobuf.c @@ -8,10 +8,10 @@ cases as published by the Free Software Foundation. */ -#include "iobuf.h" -#include "statedump.h" +#include "glusterfs/iobuf.h" +#include "glusterfs/statedump.h" #include <stdio.h> -#include "libglusterfs-messages.h" +#include "glusterfs/libglusterfs-messages.h" /* TODO: implement destroy margins and prefetching of arenas @@ -21,58 +21,51 @@ (sizeof(gf_iobuf_init_config) / (sizeof(struct iobuf_init_config))) /* Make sure this array is sorted based on pagesize */ -struct iobuf_init_config gf_iobuf_init_config[] = { +static const struct iobuf_init_config gf_iobuf_init_config[] = { /* { pagesize, num_pages }, */ {128, 1024}, {512, 512}, {2 * 1024, 512}, {8 * 1024, 128}, {32 * 1024, 64}, {128 * 1024, 32}, {256 * 1024, 8}, {1 * 1024 * 1024, 2}, }; -int -gf_iobuf_get_arena_index(size_t page_size) +static int +gf_iobuf_get_arena_index(const size_t page_size) { - int i = -1; + int i; for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { if (page_size <= gf_iobuf_init_config[i].pagesize) - break; + return i; } - if (i >= IOBUF_ARENA_MAX_INDEX) - i = -1; - - return i; + return -1; } -size_t -gf_iobuf_get_pagesize(size_t page_size) +static size_t +gf_iobuf_get_pagesize(const size_t page_size, int *index) { - int i = 0; + int i; size_t size = 0; for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { size = gf_iobuf_init_config[i].pagesize; - if (page_size <= size) - break; + if (page_size <= size) { + if (index != NULL) + *index = i; + return size; + } } - if (i >= IOBUF_ARENA_MAX_INDEX) - size = -1; - - return size; + return -1; } -void +static void __iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena) { - int iobuf_cnt = 0; + const int iobuf_cnt = iobuf_arena->page_count; struct iobuf *iobuf = NULL; int offset = 0; int i = 0; - GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); - - iobuf_cnt = iobuf_arena->page_count; - iobuf_arena->iobufs = GF_CALLOC(sizeof(*iobuf), iobuf_cnt, gf_common_mt_iobuf); if (!iobuf_arena->iobufs) @@ -94,27 +87,23 @@ __iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena) iobuf++; } -out: return; } -void +static void __iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena) { int iobuf_cnt = 0; struct iobuf *iobuf = NULL; int i = 0; - GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); - - iobuf_cnt = iobuf_arena->page_count; - if (!iobuf_arena->iobufs) { gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND, "iobufs not found"); return; } + iobuf_cnt = iobuf_arena->page_count; iobuf = iobuf_arena->iobufs; for (i = 0; i < iobuf_cnt; i++) { GF_ASSERT(GF_ATOMIC_GET(iobuf->ref) == 0); @@ -126,11 +115,10 @@ __iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena) GF_FREE(iobuf_arena->iobufs); -out: return; } -void +static void __iobuf_arena_destroy(struct iobuf_pool *iobuf_pool, struct iobuf_arena *iobuf_arena) { @@ -149,12 +137,13 @@ out: return; } -struct iobuf_arena * +static struct iobuf_arena * __iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size, int32_t num_iobufs) { struct iobuf_arena *iobuf_arena = NULL; size_t rounded_size = 0; + int index = 0; /* unused */ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); @@ -168,7 +157,7 @@ __iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size, INIT_LIST_HEAD(&iobuf_arena->passive.list); iobuf_arena->iobuf_pool = iobuf_pool; - rounded_size = gf_iobuf_get_pagesize(page_size); + rounded_size = gf_iobuf_get_pagesize(page_size, &index); iobuf_arena->page_size = rounded_size; iobuf_arena->page_count = num_iobufs; @@ -179,8 +168,7 @@ __iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (iobuf_arena->mem_base == MAP_FAILED) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED, - "mapping failed"); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED, NULL); goto err; } @@ -192,8 +180,7 @@ __iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size, __iobuf_arena_init_iobufs(iobuf_arena); if (!iobuf_arena->iobufs) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED, - "init failed"); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED, NULL); goto err; } @@ -208,24 +195,15 @@ out: return NULL; } -struct iobuf_arena * -__iobuf_arena_unprune(struct iobuf_pool *iobuf_pool, size_t page_size) +static struct iobuf_arena * +__iobuf_arena_unprune(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int index) { struct iobuf_arena *iobuf_arena = NULL; struct iobuf_arena *tmp = NULL; - int index = 0; GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); - index = gf_iobuf_get_arena_index(page_size); - if (index == -1) { - gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, - "page_size (%zu) of iobufs in arena being added is " - "greater than max available", - page_size); - return NULL; - } - list_for_each_entry(tmp, &iobuf_pool->purge[index], list) { list_del_init(&tmp->list); @@ -236,55 +214,27 @@ out: return iobuf_arena; } -struct iobuf_arena * -__iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, size_t page_size, - int32_t num_pages) +static struct iobuf_arena * +__iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int32_t num_pages, const int index) { struct iobuf_arena *iobuf_arena = NULL; - int index = 0; - - index = gf_iobuf_get_arena_index(page_size); - if (index == -1) { - gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, - "page_size (%zu) of iobufs in arena being added is " - "greater than max available", - page_size); - return NULL; - } - iobuf_arena = __iobuf_arena_unprune(iobuf_pool, page_size); - - if (!iobuf_arena) - iobuf_arena = __iobuf_arena_alloc(iobuf_pool, page_size, num_pages); + iobuf_arena = __iobuf_arena_unprune(iobuf_pool, page_size, index); if (!iobuf_arena) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, - "arena not found"); - return NULL; + iobuf_arena = __iobuf_arena_alloc(iobuf_pool, page_size, num_pages); + if (!iobuf_arena) { + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, + NULL); + return NULL; + } } list_add(&iobuf_arena->list, &iobuf_pool->arenas[index]); return iobuf_arena; } -struct iobuf_arena * -iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, size_t page_size, - int32_t num_pages) -{ - struct iobuf_arena *iobuf_arena = NULL; - - GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); - - pthread_mutex_lock(&iobuf_pool->mutex); - { - iobuf_arena = __iobuf_pool_add_arena(iobuf_pool, page_size, num_pages); - } - pthread_mutex_unlock(&iobuf_pool->mutex); - -out: - return iobuf_arena; -} - /* This function destroys all the iobufs and the iobuf_pool */ void iobuf_pool_destroy(struct iobuf_pool *iobuf_pool) @@ -397,14 +347,15 @@ iobuf_pool_new(void) iobuf_pool->mr_list[i] = NULL; } - arena_size = 0; + /* No locking required here + * as no one else can use this pool yet + */ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { page_size = gf_iobuf_init_config[i].pagesize; num_pages = gf_iobuf_init_config[i].num_pages; - iobuf_pool_add_arena(iobuf_pool, page_size, num_pages); - - arena_size += page_size * num_pages; + if (__iobuf_pool_add_arena(iobuf_pool, page_size, num_pages, i) != NULL) + arena_size += page_size * num_pages; } /* Need an arena to handle all the bigger iobuf requests */ @@ -416,12 +367,10 @@ out: return iobuf_pool; } -void +static void __iobuf_arena_prune(struct iobuf_pool *iobuf_pool, - struct iobuf_arena *iobuf_arena, int index) + struct iobuf_arena *iobuf_arena, const int index) { - GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); - /* code flow comes here only if the arena is in purge list and we can * free the arena only if we have at least one arena in 'arenas' list * (ie, at least few iobufs free in arena), that way, there won't @@ -470,23 +419,13 @@ out: return; } -struct iobuf_arena * -__iobuf_select_arena(struct iobuf_pool *iobuf_pool, size_t page_size) +/* Always called under the iobuf_pool mutex lock */ +static struct iobuf_arena * +__iobuf_select_arena(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int index) { struct iobuf_arena *iobuf_arena = NULL; struct iobuf_arena *trav = NULL; - int index = 0; - - GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); - - index = gf_iobuf_get_arena_index(page_size); - if (index == -1) { - gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, - "page_size (%zu) of iobufs in arena being added is " - "greater than max available", - page_size); - return NULL; - } /* look for unused iobuf from the head-most arena */ list_for_each_entry(trav, &iobuf_pool->arenas[index], list) @@ -500,23 +439,25 @@ __iobuf_select_arena(struct iobuf_pool *iobuf_pool, size_t page_size) if (!iobuf_arena) { /* all arenas were full, find the right count to add */ iobuf_arena = __iobuf_pool_add_arena( - iobuf_pool, page_size, gf_iobuf_init_config[index].num_pages); + iobuf_pool, page_size, gf_iobuf_init_config[index].num_pages, + index); } -out: return iobuf_arena; } -struct iobuf * -__iobuf_get(struct iobuf_arena *iobuf_arena, size_t page_size) +/* Always called under the iobuf_pool mutex lock */ +static struct iobuf * +__iobuf_get(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int index) { struct iobuf *iobuf = NULL; - struct iobuf_pool *iobuf_pool = NULL; - int index = 0; - - GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); + struct iobuf_arena *iobuf_arena = NULL; - iobuf_pool = iobuf_arena->iobuf_pool; + /* most eligible arena for picking an iobuf */ + iobuf_arena = __iobuf_select_arena(iobuf_pool, page_size, index); + if (!iobuf_arena) + return NULL; list_for_each_entry(iobuf, &iobuf_arena->passive.list, list) break; @@ -533,26 +474,15 @@ __iobuf_get(struct iobuf_arena *iobuf_arena, size_t page_size) iobuf_arena->max_active = iobuf_arena->active_cnt; if (iobuf_arena->passive_cnt == 0) { - index = gf_iobuf_get_arena_index(page_size); - if (index == -1) { - gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, - "page_size (%zu) of" - " iobufs in arena being added is greater " - "than max available", - page_size); - goto out; - } - list_del(&iobuf_arena->list); list_add(&iobuf_arena->list, &iobuf_pool->filled[index]); } -out: return iobuf; } -struct iobuf * -iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, size_t page_size) +static struct iobuf * +iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, const size_t page_size) { struct iobuf *iobuf = NULL; struct iobuf_arena *iobuf_arena = NULL; @@ -598,14 +528,14 @@ struct iobuf * iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size) { struct iobuf *iobuf = NULL; - struct iobuf_arena *iobuf_arena = NULL; size_t rounded_size = 0; + int index = 0; if (page_size == 0) { page_size = iobuf_pool->default_page_size; } - rounded_size = gf_iobuf_get_pagesize(page_size); + rounded_size = gf_iobuf_get_pagesize(page_size, &index); if (rounded_size == -1) { /* make sure to provide the requested buffer with standard memory allocations */ @@ -619,24 +549,26 @@ iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size) iobuf_pool->request_misses++; return iobuf; + } else if (index == -1) { + gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, + "page_size=%zu", page_size, NULL); + return NULL; } pthread_mutex_lock(&iobuf_pool->mutex); { - /* most eligible arena for picking an iobuf */ - iobuf_arena = __iobuf_select_arena(iobuf_pool, rounded_size); - if (!iobuf_arena) - goto unlock; - - iobuf = __iobuf_get(iobuf_arena, rounded_size); - if (!iobuf) - goto unlock; + iobuf = __iobuf_get(iobuf_pool, rounded_size, index); + if (!iobuf) { + pthread_mutex_unlock(&iobuf_pool->mutex); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND, + NULL); + goto post_unlock; + } iobuf_ref(iobuf); } -unlock: pthread_mutex_unlock(&iobuf_pool->mutex); - +post_unlock: return iobuf; } @@ -676,46 +608,41 @@ struct iobuf * iobuf_get(struct iobuf_pool *iobuf_pool) { struct iobuf *iobuf = NULL; - struct iobuf_arena *iobuf_arena = NULL; + int index = 0; GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); + index = gf_iobuf_get_arena_index(iobuf_pool->default_page_size); + if (index == -1) { + gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, + "page_size=%zu", iobuf_pool->default_page_size, NULL); + return NULL; + } + pthread_mutex_lock(&iobuf_pool->mutex); { - /* most eligible arena for picking an iobuf */ - iobuf_arena = __iobuf_select_arena(iobuf_pool, - iobuf_pool->default_page_size); - if (!iobuf_arena) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, - "arena not found"); - goto unlock; - } - - iobuf = __iobuf_get(iobuf_arena, iobuf_pool->default_page_size); + iobuf = __iobuf_get(iobuf_pool, iobuf_pool->default_page_size, index); if (!iobuf) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND, - "iobuf not found"); - goto unlock; + pthread_mutex_unlock(&iobuf_pool->mutex); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND, + NULL); + goto out; } iobuf_ref(iobuf); } -unlock: pthread_mutex_unlock(&iobuf_pool->mutex); out: return iobuf; } -void +static void __iobuf_put(struct iobuf *iobuf, struct iobuf_arena *iobuf_arena) { struct iobuf_pool *iobuf_pool = NULL; int index = 0; - GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); - GF_VALIDATE_OR_GOTO("iobuf", iobuf, out); - iobuf_pool = iobuf_arena->iobuf_pool; index = gf_iobuf_get_arena_index(iobuf_arena->page_size); @@ -751,6 +678,7 @@ __iobuf_put(struct iobuf *iobuf, struct iobuf_arena *iobuf_arena) if (iobuf_arena->active_cnt == 0) { list_del(&iobuf_arena->list); list_add_tail(&iobuf_arena->list, &iobuf_pool->purge[index]); + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); __iobuf_arena_prune(iobuf_pool, iobuf_arena, index); } out: @@ -767,15 +695,14 @@ iobuf_put(struct iobuf *iobuf) iobuf_arena = iobuf->iobuf_arena; if (!iobuf_arena) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, - "arena not found"); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL); return; } iobuf_pool = iobuf_arena->iobuf_pool; if (!iobuf_pool) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, - "iobuf pool not found"); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, "iobuf", + NULL); return; } @@ -1014,14 +941,12 @@ iobuf_size(struct iobuf *iobuf) GF_VALIDATE_OR_GOTO("iobuf", iobuf, out); if (!iobuf->iobuf_arena) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, - "arena not found"); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL); goto out; } if (!iobuf->iobuf_arena->iobuf_pool) { - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, - "pool not found"); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, NULL); goto out; } diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c index afbb6dcad80..ce4b0e8255d 100644 --- a/libglusterfs/src/latency.c +++ b/libglusterfs/src/latency.c @@ -13,41 +13,35 @@ * latencies of FOPs broken down by subvolumes. */ -#include "glusterfs.h" -#include "xlator.h" -#include "common-utils.h" -#include "statedump.h" -#include "libglusterfs-messages.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/statedump.h" -void -gf_update_latency(call_frame_t *frame) +gf_latency_t * +gf_latency_new(size_t n) { - double elapsed; - struct timespec *begin, *end; - - fop_latency_t *lat; - - begin = &frame->begin; - end = &frame->end; + int i = 0; + gf_latency_t *lat = NULL; - if (!(begin->tv_sec && end->tv_sec)) - goto out; + lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t); + if (!lat) + return NULL; - elapsed = (end->tv_sec - begin->tv_sec) * 1e9 + - (end->tv_nsec - begin->tv_nsec); + for (i = 0; i < n; i++) { + gf_latency_reset(lat + i); + } + return lat; +} - if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) { - gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d", - frame->op); +void +gf_latency_update(gf_latency_t *lat, struct timespec *begin, + struct timespec *end) +{ + if (!(begin->tv_sec && end->tv_sec)) { + /*Measure latency might have been enabled/disabled during the op*/ return; } - /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros - set it right anyways for those frames */ - if (!frame->op) - frame->op = frame->root->op; - - lat = &frame->this->stats.interval.latencies[frame->op]; + double elapsed = gf_tsdiff(begin, end); if (lat->max < elapsed) lat->max = elapsed; @@ -57,40 +51,34 @@ gf_update_latency(call_frame_t *frame) lat->total += elapsed; lat->count++; -out: - return; } void -gf_proc_dump_latency_info(xlator_t *xl) +gf_latency_reset(gf_latency_t *lat) { - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; - int i; - - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name); - gf_proc_dump_add_section("%s", key_prefix); - - for (i = 0; i < GF_FOP_MAXVALUE; i++) { - gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]); - - fop_latency_t *lat = &xl->stats.interval.latencies[i]; + if (!lat) + return; + memset(lat, 0, sizeof(*lat)); + lat->min = ULLONG_MAX; + /* make sure 'min' is set to high value, so it would be + properly set later */ +} - /* Doesn't make sense to continue if there are no fops - came in the given interval */ - if (!lat->count) - continue; +void +gf_frame_latency_update(call_frame_t *frame) +{ + gf_latency_t *lat; + /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros + set it right anyways for those frames */ + if (!frame->op) + frame->op = frame->root->op; - gf_proc_dump_write(key, "%.03f,%" PRId64 ",%.03f", - (lat->total / lat->count), lat->count, lat->total); + if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) { + gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d", + frame->op); + return; } - memset(xl->stats.interval.latencies, 0, - sizeof(xl->stats.interval.latencies)); - - /* make sure 'min' is set to high value, so it would be - properly set later */ - for (i = 0; i < GF_FOP_MAXVALUE; i++) { - xl->stats.interval.latencies[i].min = 0xffffffff; - } + lat = &frame->this->stats.interval.latencies[frame->op]; + gf_latency_update(lat, &frame->begin, &frame->end); } diff --git a/libglusterfs/src/libglusterfs-messages.h b/libglusterfs/src/libglusterfs-messages.h deleted file mode 100644 index d40d644fad9..00000000000 --- a/libglusterfs/src/libglusterfs-messages.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. - */ - -#ifndef _LG_MESSAGES_H_ -#define _LG_MESSAGES_H_ - -#include "glfs-message-id.h" - -/* To add new message IDs, append new identifiers at the end of the list. - * - * Never remove a message ID. If it's not used anymore, you can rename it or - * leave it as it is, but not delete it. This is to prevent reutilization of - * IDs by other messages. - * - * The component name must match one of the entries defined in - * glfs-message-id.h. - */ - -GLFS_MSGID( - LIBGLUSTERFS, LG_MSG_ASPRINTF_FAILED, LG_MSG_INVALID_ENTRY, - LG_MSG_COUNT_LESS_THAN_ZERO, LG_MSG_COUNT_LESS_THAN_DATA_PAIRS, - LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, LG_MSG_PAIRS_LESS_THAN_COUNT, - LG_MSG_KEY_OR_VALUE_NULL, LG_MSG_FAILED_TO_LOG_DICT, - LG_MSG_NULL_VALUE_IN_DICT, LG_MSG_DIR_OP_FAILED, - LG_MSG_STORE_HANDLE_CREATE_FAILED, LG_MSG_FILE_OP_FAILED, - LG_MSG_FILE_STAT_FAILED, LG_MSG_LOCK_FAILED, LG_MSG_UNLOCK_FAILED, - LG_MSG_DICT_SERIAL_FAILED, LG_MSG_DICT_UNSERIAL_FAILED, LG_MSG_NO_MEMORY, - LG_MSG_VOLUME_ERROR, LG_MSG_SUB_VOLUME_ERROR, LG_MSG_SYNTAX_ERROR, - LG_MSG_BACKTICK_PARSE_FAILED, LG_MSG_BUFFER_ERROR, LG_MSG_STRDUP_ERROR, - LG_MSG_HASH_FUNC_ERROR, LG_MSG_GET_BUCKET_FAILED, LG_MSG_INSERT_FAILED, - LG_MSG_OUT_OF_RANGE, LG_MSG_VALIDATE_RETURNS, LG_MSG_VALIDATE_REC_FAILED, - LG_MSG_RB_TABLE_CREATE_FAILED, LG_MSG_PATH_NOT_FOUND, - LG_MSG_EXPAND_FD_TABLE_FAILED, LG_MSG_MAPPING_FAILED, - LG_MSG_INIT_IOBUF_FAILED, LG_MSG_PAGE_SIZE_EXCEEDED, LG_MSG_ARENA_NOT_FOUND, - LG_MSG_IOBUF_NOT_FOUND, LG_MSG_POOL_NOT_FOUND, LG_MSG_SET_ATTRIBUTE_FAILED, - LG_MSG_READ_ATTRIBUTE_FAILED, LG_MSG_UNMOUNT_FAILED, - LG_MSG_LATENCY_MEASUREMENT_STATE, LG_MSG_NO_PERM, LG_MSG_NO_KEY, - LG_MSG_DICT_NULL, LG_MSG_INIT_TIMER_FAILED, LG_MSG_FD_ANONYMOUS_FAILED, - LG_MSG_FD_CREATE_FAILED, LG_MSG_BUFFER_FULL, LG_MSG_FWRITE_FAILED, - LG_MSG_PRINT_FAILED, LG_MSG_MEM_POOL_DESTROY, - LG_MSG_EXPAND_CLIENT_TABLE_FAILED, LG_MSG_DISCONNECT_CLIENT, - LG_MSG_PIPE_CREATE_FAILED, LG_MSG_SET_PIPE_FAILED, - LG_MSG_REGISTER_PIPE_FAILED, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS, - LG_MSG_INDEX_NOT_FOUND, LG_MSG_EPOLL_FD_CREATE_FAILED, - LG_MSG_SLOT_NOT_FOUND, LG_MSG_STALE_FD_FOUND, LG_MSG_GENERATION_MISMATCH, - LG_MSG_PTHREAD_KEY_CREATE_FAILED, LG_MSG_TRANSLATOR_INIT_FAILED, - LG_MSG_UUID_BUF_INIT_FAILED, LG_MSG_LKOWNER_BUF_INIT_FAILED, - LG_MSG_SYNCTASK_INIT_FAILED, LG_MSG_SYNCOPCTX_INIT_FAILED, - LG_MSG_GLOBAL_INIT_FAILED, LG_MSG_PTHREAD_FAILED, LG_MSG_DIR_IS_SYMLINK, - LG_MSG_RESOLVE_HOSTNAME_FAILED, LG_MSG_GETADDRINFO_FAILED, - LG_MSG_GETNAMEINFO_FAILED, LG_MSG_PATH_ERROR, LG_MSG_INET_PTON_FAILED, - LG_MSG_NEGATIVE_NUM_PASSED, LG_MSG_GETHOSTNAME_FAILED, - LG_MSG_RESERVED_PORTS_ERROR, LG_MSG_INVALID_PORT, LG_MSG_INVALID_FAMILY, - LG_MSG_CONVERSION_FAILED, LG_MSG_SKIP_HEADER_FAILED, LG_MSG_INVALID_LOG, - LG_MSG_UTIMES_FAILED, LG_MSG_BACKTRACE_SAVE_FAILED, LG_MSG_INIT_FAILED, - LG_MSG_VALIDATION_FAILED, LG_MSG_GRAPH_ERROR, LG_MSG_UNKNOWN_OPTIONS_FAILED, - LG_MSG_CTX_NULL, LG_MSG_TMPFILE_CREATE_FAILED, LG_MSG_DLOPEN_FAILED, - LG_MSG_LOAD_FAILED, LG_MSG_DLSYM_ERROR, LG_MSG_TREE_NOT_FOUND, - LG_MSG_PER_DENTRY, LG_MSG_DENTRY, LG_MSG_GETIFADDRS_FAILED, - LG_MSG_REGEX_OP_FAILED, LG_MSG_FRAME_ERROR, LG_MSG_SET_PARAM_FAILED, - LG_MSG_GET_PARAM_FAILED, LG_MSG_PREPARE_FAILED, LG_MSG_EXEC_FAILED, - LG_MSG_BINDING_FAILED, LG_MSG_DELETE_FAILED, LG_MSG_GET_ID_FAILED, - LG_MSG_CREATE_FAILED, LG_MSG_PARSE_FAILED, LG_MSG_GETCONTEXT_FAILED, - LG_MSG_UPDATE_FAILED, LG_MSG_QUERY_CALL_BACK_FAILED, - LG_MSG_GET_RECORD_FAILED, LG_MSG_DB_ERROR, LG_MSG_CONNECTION_ERROR, - LG_MSG_NOT_MULTITHREAD_MODE, LG_MSG_SKIP_PATH, LG_MSG_INVALID_FOP, - LG_MSG_QUERY_FAILED, LG_MSG_CLEAR_COUNTER_FAILED, LG_MSG_LOCK_LIST_FAILED, - LG_MSG_UNLOCK_LIST_FAILED, LG_MSG_ADD_TO_LIST_FAILED, LG_MSG_INIT_DB_FAILED, - LG_MSG_DELETE_FROM_LIST_FAILED, LG_MSG_CLOSE_CONNECTION_FAILED, - LG_MSG_INSERT_OR_UPDATE_FAILED, LG_MSG_FIND_OP_FAILED, - LG_MSG_CONNECTION_INIT_FAILED, LG_MSG_COMPLETED_TASK, LG_MSG_WAKE_UP_ZOMBIE, - LG_MSG_REWAITING_TASK, LG_MSG_SLEEP_ZOMBIE, LG_MSG_SWAPCONTEXT_FAILED, - LG_MSG_UNSUPPORTED_PLUGIN, LG_MSG_INVALID_DB_TYPE, LG_MSG_UNDERSIZED_BUF, - LG_MSG_DATA_CONVERSION_ERROR, LG_MSG_DICT_ERROR, LG_MSG_IOBUFS_NOT_FOUND, - LG_MSG_ENTRIES_NULL, LG_MSG_FD_NOT_FOUND_IN_FDTABLE, - LG_MSG_REALLOC_FOR_FD_PTR_FAILED, LG_MSG_DICT_SET_FAILED, LG_MSG_NULL_PTR, - LG_MSG_RBTHASH_INIT_BUCKET_FAILED, LG_MSG_ASSERTION_FAILED, - LG_MSG_HOSTNAME_NULL, LG_MSG_INVALID_IPV4_FORMAT, - LG_MSG_CTX_CLEANUP_STARTED, LG_MSG_TIMER_REGISTER_ERROR, - LG_MSG_PTR_HEADER_CORRUPTED, LG_MSG_INVALID_UPLINK, LG_MSG_CLIENT_NULL, - LG_MSG_XLATOR_DOES_NOT_IMPLEMENT, LG_MSG_DENTRY_NOT_FOUND, - LG_MSG_INODE_NOT_FOUND, LG_MSG_INODE_TABLE_NOT_FOUND, - LG_MSG_DENTRY_CREATE_FAILED, LG_MSG_INODE_CONTEXT_FREED, - LG_MSG_UNKNOWN_LOCK_TYPE, LG_MSG_UNLOCK_BEFORE_LOCK, - LG_MSG_LOCK_OWNER_ERROR, LG_MSG_MEMPOOL_PTR_NULL, - LG_MSG_QUOTA_XATTRS_MISSING, LG_MSG_INVALID_STRING, LG_MSG_BIND_REF, - LG_MSG_REF_COUNT, LG_MSG_INVALID_ARG, LG_MSG_VOL_OPTION_ADD, - LG_MSG_XLATOR_OPTION_INVALID, LG_MSG_GETTIMEOFDAY_FAILED, - LG_MSG_GRAPH_INIT_FAILED, LG_MSG_EVENT_NOTIFY_FAILED, - LG_MSG_ACTIVE_GRAPH_NULL, LG_MSG_VOLFILE_PARSE_ERROR, LG_MSG_FD_INODE_NULL, - LG_MSG_INVALID_VOLFILE_ENTRY, LG_MSG_PER_DENTRY_FAILED, - LG_MSG_PARENT_DENTRY_NOT_FOUND, LG_MSG_DENTRY_CYCLIC_LOOP, - LG_MSG_INVALID_POLL_IN, LG_MSG_INVALID_POLL_OUT, LG_MSG_EPOLL_FD_ADD_FAILED, - LG_MSG_EPOLL_FD_DEL_FAILED, LG_MSG_EPOLL_FD_MODIFY_FAILED, - LG_MSG_STARTED_EPOLL_THREAD, LG_MSG_EXITED_EPOLL_THREAD, - LG_MSG_START_EPOLL_THREAD_FAILED, LG_MSG_FALLBACK_TO_POLL, - LG_MSG_QUOTA_CONF_ERROR, LG_MSG_RBTHASH_GET_ENTRY_FAILED, - LG_MSG_RBTHASH_GET_BUCKET_FAILED, LG_MSG_RBTHASH_INSERT_FAILED, - LG_MSG_RBTHASH_INIT_ENTRY_FAILED, LG_MSG_TMPFILE_DELETE_FAILED, - LG_MSG_MEMPOOL_INVALID_FREE, LG_MSG_LOCK_FAILURE, LG_MSG_SET_LOG_LEVEL, - LG_MSG_VERIFY_PLATFORM, LG_MSG_RUNNER_LOG, LG_MSG_LEASEID_BUF_INIT_FAILED, - LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST, - LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED, - LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG, - LG_MSG_XXH64_TO_GFID_FAILED); - -#endif /* !_LG_MESSAGES_H_ */ diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index c5b322a9bdf..5f18cd56cbe 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -92,6 +92,8 @@ args_xattrop_cbk_store args_xattrop_store args_zerofill_cbk_store args_zerofill_store +args_copy_file_range_cbk_store +args_copy_file_range_store bin_to_data call_resume call_resume_keep_stub @@ -127,10 +129,6 @@ cluster_uninodelk cluster_unlink cluster_xattrop cluster_xattrop_cbk -compound_args_cbk_alloc -compound_args_cbk_cleanup -compound_args_cleanup -compound_fop_alloc copy_opts_to_child create_frame data_copy @@ -351,7 +349,10 @@ default_put default_put_cbk default_put_failure_cbk default_put_resume -__dentry_grep +default_copy_file_range +default_copy_file_range_cbk +default_copy_file_range_failure_cbk +default_copy_file_range_resume dht_is_linkfile dict_add dict_addn @@ -374,6 +375,7 @@ dict_get_bin dict_get_double dict_get_gfuuid dict_get_iatt +dict_get_mdata dict_get_int16 dict_get_int32 dict_get_int32n @@ -399,6 +401,7 @@ dict_rename_key dict_reset dict_serialize dict_serialized_length +dict_serialized_length_lk dict_serialize_value_with_delim dict_set dict_setn @@ -410,6 +413,7 @@ dict_set_dynstrn dict_set_dynstr_with_alloc dict_set_gfuuid dict_set_iatt +dict_set_mdata dict_set_int16 dict_set_int32 dict_set_int32n @@ -434,19 +438,20 @@ eh_dump eh_new eh_save_history entry_copy -event_dispatch -event_dispatch_destroy -event_handled -event_pool_destroy -event_pool_new -event_reconfigure_threads -event_register -event_select_on -event_unregister -event_unregister_close +gf_event_dispatch +gf_event_dispatch_destroy +gf_event_handled +gf_event_pool_destroy +gf_event_pool_new +gf_event_reconfigure_threads +gf_event_register +gf_event_select_on +gf_event_unregister +gf_event_unregister_close fd_anonymous fd_anonymous_with_flags fd_bind +fd_close fd_create fd_create_uint64 __fd_ctx_del @@ -471,6 +476,8 @@ fd_unref _fini fop_access_stub fop_create_stub +fop_copy_file_range_stub +fop_copy_file_range_cbk_stub fop_discard_stub fop_entrylk_stub fop_enum_to_pri_string @@ -500,6 +507,7 @@ fop_lease_stub fop_link_stub fop_lk_stub fop_log_level +fop_lookup_cbk_stub fop_lookup_stub fop_mkdir_stub fop_mknod_stub @@ -539,14 +547,16 @@ get_checksum_for_path get_file_mtime get_host_name get_mem_size -get_new_dict get_path_name -get_struct_variable -get_vol_type get_xlator_by_name get_xlator_by_type gf_array_insertionsort gf_asprintf +gf_async +gf_async_adjust_threads +gf_async_ctrl +gf_async_init +gf_async_fini gf_backtrace_save gf_bits_count gf_bits_index @@ -575,6 +585,7 @@ gf_dirent_free gf_dirent_orig_offset gf_dm_hashfn gf_dnscache_init +gf_dnscache_deinit gf_errno_to_error gf_error_to_errno _gf_event @@ -683,10 +694,10 @@ gf_store_read_and_tokenize gf_store_rename_tmppath gf_store_retrieve_value gf_store_save_value +gf_store_save_items gf_store_unlink_tmppath gf_store_unlock gf_string2boolean -gf_string2bytesize gf_string2bytesize_int64 gf_string2bytesize_uint64 gf_string2double @@ -704,7 +715,10 @@ gf_strTrim gf_strstr gf_thread_cleanup_xint gf_thread_create +gf_thread_vcreate gf_thread_create_detached +gf_thread_set_name +gf_thread_set_vname gf_timer_call_after gf_timer_call_cancel gf_timer_registry_destroy @@ -784,6 +798,7 @@ __inode_find inode_find inode_find_directory_name inode_forget +inode_forget_with_unref inode_from_path inode_grep inode_grep_for_gfid @@ -808,6 +823,7 @@ inode_table_destroy_all inode_table_dump inode_table_dump_to_dict inode_table_new +inode_table_with_invalidator __inode_table_set_lru_limit inode_table_set_lru_limit inode_unlink @@ -860,8 +876,7 @@ mem_get0 mem_pool_destroy mem_pool_new_fn mem_pools_fini -mem_pools_init_early -mem_pools_init_late +mem_pools_init mem_put mkdir_p next_token @@ -924,6 +939,12 @@ syncbarrier_destroy syncbarrier_init syncbarrier_wait syncbarrier_wake +synccond_init +synccond_destroy +synccond_wait +synccond_timedwait +synccond_signal +synccond_broadcast syncenv_destroy syncenv_new synclock_destroy @@ -934,6 +955,7 @@ synclock_unlock syncop_access syncop_close syncop_create +syncop_copy_file_range syncopctx_getctx syncopctx_setfsgid syncopctx_setfsgroups @@ -1000,6 +1022,7 @@ synctask_new synctask_new1 synctask_set synctask_setid +synctask_sleep synctask_wake synctask_yield sys_access @@ -1007,6 +1030,7 @@ sys_chmod sys_chown sys_close sys_closedir +sys_copy_file_range sys_creat sys_fallocate sys_fchmod @@ -1024,6 +1048,7 @@ sys_futimes sys_lchown sys_lgetxattr sys_link +sys_linkat sys_llistxattr sys_lremovexattr sys_lseek @@ -1047,15 +1072,24 @@ sys_rmdir sys_stat sys_statvfs sys_symlink +sys_symlinkat sys_truncate sys_unlink +sys_unlinkat sys_utimensat sys_write sys_writev +sys_socket +sys_accept +sys_kill +sys_sysctl tbf_init tbf_throttle timespec_now +timespec_now_realtime timespec_sub +timespec_adjust_delta +timespec_cmp token_iter_init trap trie_add @@ -1076,6 +1110,7 @@ xlator_foreach xlator_foreach_depth_first xlator_init xlator_mem_acct_init +xlator_mem_acct_unref xlator_notify xlator_option_info_list xlator_option_init_bool @@ -1086,10 +1121,12 @@ xlator_option_init_percent xlator_option_init_percent_or_size xlator_option_init_size xlator_option_init_size_uint64 +xlator_option_init_size_int64 xlator_option_init_str xlator_option_init_time xlator_option_init_uint32 xlator_option_init_uint64 +xlator_option_init_int64 xlator_option_init_xlator xlator_option_reconf_bool xlator_option_reconf_int32 @@ -1098,10 +1135,12 @@ xlator_option_reconf_percent xlator_option_reconf_percent_or_size xlator_option_reconf_size xlator_option_reconf_size_uint64 +xlator_option_reconf_size_int64 xlator_option_reconf_str xlator_option_reconf_time xlator_option_reconf_uint32 xlator_option_reconf_uint64 +xlator_option_reconf_int64 xlator_option_reconf_xlator xlator_options_validate xlator_options_validate_list @@ -1118,6 +1157,7 @@ xlator_volume_option_get xlator_volume_option_get_list xlator_memrec_free xlator_mem_cleanup +gluster_graph_take_reference default_fops gf_fop_list gf_upcall_list @@ -1133,4 +1173,21 @@ gf_replace_old_iatt_in_dict gf_replace_new_iatt_in_dict gf_changelog_init gf_changelog_register_generic -gf_gfid_generate_from_xxh64
\ No newline at end of file +gf_gfid_generate_from_xxh64 +find_xlator_option_in_cmd_args_t +gf_d_type_from_ia_type +glusterfs_graph_fini +glusterfs_process_svc_attach_volfp +glusterfs_mux_volfile_reconfigure +glusterfs_process_svc_detach +mgmt_is_multiplexed_daemon +xlator_is_cleanup_starting +gf_nanosleep +gf_syncfs +graph_total_client_xlator +get_xattrs_to_heal +gf_latency_statedump_and_reset +gf_latency_new +gf_latency_reset +gf_latency_update +gf_frame_latency_update diff --git a/libglusterfs/src/locking.c b/libglusterfs/src/locking.c index 89cbaa00c8b..7577054e33a 100644 --- a/libglusterfs/src/locking.c +++ b/libglusterfs/src/locking.c @@ -15,7 +15,7 @@ #include <unistd.h> #define LOCKING_IMPL -#include "locking.h" +#include "glusterfs/locking.h" int use_spinlocks = 0; diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c index 61909ef4dae..a930d3e3b63 100644 --- a/libglusterfs/src/logging.c +++ b/libglusterfs/src/logging.c @@ -17,6 +17,7 @@ #include <string.h> #include <stdlib.h> #include <syslog.h> +#include <sys/resource.h> #ifdef HAVE_BACKTRACE #include <execinfo.h> @@ -26,7 +27,7 @@ #include <sys/stat.h> -#include "syscall.h" +#include "glusterfs/syscall.h" #define GF_JSON_MSG_LENGTH 8192 #define GF_SYSLOG_CEE_FORMAT \ @@ -34,22 +35,19 @@ #define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf" #define GF_LOG_BACKTRACE_DEPTH 5 #define GF_LOG_BACKTRACE_SIZE 4096 -#define GF_LOG_TIMESTR_SIZE 256 #define GF_MAX_SLOG_PAIR_COUNT 100 -#include "xlator.h" -#include "logging.h" -#include "defaults.h" -#include "glusterfs.h" -#include "timer.h" -#include "libglusterfs-messages.h" +#include "glusterfs/logging.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/timer.h" +#include "glusterfs/libglusterfs-messages.h" /* Do not replace gf_log in TEST_LOG with gf_msg, as there is a slight chance * that it could lead to an infinite recursion.*/ #define TEST_LOG(__msg, __args...) \ gf_log("logging-infra", GF_LOG_DEBUG, __msg, ##__args); -void +static void gf_log_flush_timeout_cbk(void *data); int @@ -58,72 +56,54 @@ gf_log_inject_timer_event(glusterfs_ctx_t *ctx); static void gf_log_flush_extra_msgs(glusterfs_ctx_t *ctx, uint32_t new); -static char *gf_level_strings[] = {"", /* NONE */ - "M", /* EMERGENCY */ - "A", /* ALERT */ - "C", /* CRITICAL */ - "E", /* ERROR */ - "W", /* WARNING */ - "N", /* NOTICE */ - "I", /* INFO */ - "D", /* DEBUG */ - "T", /* TRACE */ - ""}; - -/* Ideally this should get moved to logging.h */ -struct _msg_queue { - struct list_head msgs; -}; - -struct _log_msg { - const char *msg; - struct list_head queue; +static int +log_buf_init(log_buf_t *buf, const char *domain, const char *file, + const char *function, int32_t line, gf_loglevel_t level, + int errnum, uint64_t msgid, char **appmsgstr, int graph_id); +static void +gf_log_rotate(glusterfs_ctx_t *ctx); + +static char gf_level_strings[] = { + ' ', /* NONE */ + 'M', /* EMERGENCY */ + 'A', /* ALERT */ + 'C', /* CRITICAL */ + 'E', /* ERROR */ + 'W', /* WARNING */ + 'N', /* NOTICE */ + 'I', /* INFO */ + 'D', /* DEBUG */ + 'T', /* TRACE */ }; void gf_log_logrotate(int signum) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) { - ctx->log.logrotate = 1; - ctx->log.cmd_history_logrotate = 1; + if (THIS->ctx) { + THIS->ctx->log.logrotate = 1; + THIS->ctx->log.cmd_history_logrotate = 1; } } void gf_log_enable_syslog(void) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - ctx->log.gf_log_syslog = 1; + if (THIS->ctx) + THIS->ctx->log.gf_log_syslog = 1; } void gf_log_disable_syslog(void) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - ctx->log.gf_log_syslog = 0; + if (THIS->ctx) + THIS->ctx->log.gf_log_syslog = 0; } gf_loglevel_t gf_log_get_loglevel(void) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - return ctx->log.loglevel; + if (THIS->ctx) + return THIS->ctx->log.loglevel; else /* return global defaults (see gf_log_globals_init) */ return GF_LOG_INFO; @@ -139,12 +119,8 @@ gf_log_set_loglevel(glusterfs_ctx_t *ctx, gf_loglevel_t level) int gf_log_get_localtime(void) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - return ctx->log.localtime; + if (THIS->ctx) + return THIS->ctx->log.localtime; else /* return global defaults (see gf_log_globals_init) */ return 0; @@ -153,22 +129,15 @@ gf_log_get_localtime(void) void gf_log_set_localtime(int on_off) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - ctx->log.localtime = on_off; + if (THIS->ctx) + THIS->ctx->log.localtime = on_off; } void gf_log_flush(void) { - xlator_t *this = NULL; - glusterfs_ctx_t *ctx = NULL; - - this = THIS; - ctx = this->ctx; + xlator_t *this = THIS; + glusterfs_ctx_t *ctx = this->ctx; if (ctx && ctx->log.logger == gf_logger_glusterlog) { pthread_mutex_lock(&ctx->log.logfile_mutex); @@ -205,54 +174,19 @@ gf_log_set_xl_loglevel(void *this, gf_loglevel_t level) * * care needs to be taken to configure and start daemons based on the versions * that supports these features */ -gf_log_format_t -gf_log_get_logformat(void) -{ - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - return ctx->log.logformat; - else - /* return global defaluts (see gf_log_globals_init) */ - return gf_logformat_withmsgid; -} void gf_log_set_logformat(gf_log_format_t format) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - ctx->log.logformat = format; -} - -gf_log_logger_t -gf_log_get_logger(void) -{ - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - return ctx->log.logger; - else - /* return global defaluts (see gf_log_globals_init) */ - return gf_logger_glusterlog; + if (THIS->ctx) + THIS->ctx->log.logformat = format; } void gf_log_set_logger(gf_log_logger_t logger) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - ctx->log.logger = logger; + if (THIS->ctx) + THIS->ctx->log.logger = logger; } gf_loglevel_t @@ -296,21 +230,11 @@ gf_log_set_log_flush_timeout(uint32_t timeout) THIS->ctx->log.timeout = timeout; } -log_buf_t * -log_buf_new() -{ - log_buf_t *buf = NULL; - - buf = mem_get0(THIS->ctx->logbuf_pool); - - return buf; -} - /* If log_buf_init() fails (indicated by a return value of -1), * call log_buf_destroy() to clean up memory allocated in heap and to return * the log_buf_t object back to its memory pool. */ -int +static int log_buf_init(log_buf_t *buf, const char *domain, const char *file, const char *function, int32_t line, gf_loglevel_t level, int errnum, uint64_t msgid, char **appmsgstr, int graph_id) @@ -349,7 +273,7 @@ out: return ret; } -int +static int log_buf_destroy(log_buf_t *buf) { if (!buf) @@ -387,18 +311,16 @@ gf_log_rotate(glusterfs_ctx_t *ctx) fd = sys_open(ctx->log.filename, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR); if (fd < 0) { - gf_msg("logrotate", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, - "failed to open " - "logfile"); + gf_smsg("logrotate", GF_LOG_ERROR, errno, + LG_MSG_OPEN_LOGFILE_FAILED, NULL); return; } new_logfile = fdopen(fd, "a"); if (!new_logfile) { - gf_msg("logrotate", GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED, - "failed to open logfile" - " %s", - ctx->log.filename); + gf_smsg("logrotate", GF_LOG_CRITICAL, errno, + LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s", + ctx->log.filename, NULL); sys_close(fd); return; } @@ -507,7 +429,7 @@ out: * * @return: void */ -void +static void gf_openlog(const char *ident, int option, int facility) { int _option = option; @@ -551,7 +473,7 @@ gf_openlog(const char *ident, int option, int facility) * buf = "I/O error\u001bon /tmp/bar file" * */ -char * +static char * _json_escape(const char *str, char *buf, size_t len) { static const unsigned char json_exceptions[UCHAR_MAX + 1] = { @@ -630,7 +552,7 @@ _json_escape(const char *str, char *buf, size_t len) * * @return: void */ -void +static void gf_syslog(int facility_priority, char *format, ...) { char *msg = NULL; @@ -680,12 +602,10 @@ gf_log_globals_init(void *data, gf_loglevel_t level) int gf_log_init(void *data, const char *file, const char *ident) { - glusterfs_ctx_t *ctx = NULL; + glusterfs_ctx_t *ctx = data; int fd = -1; struct stat buf; - ctx = data; - if (ctx == NULL) { fprintf(stderr, "ERROR: ctx is NULL\n"); return -1; @@ -720,6 +640,13 @@ gf_log_init(void *data, const char *file, const char *ident) GF_FREE(ctx->log.filename); ctx->log.filename = NULL; + /* close and reopen logfile for log rotate */ + if (ctx->log.logfile) { + fclose(ctx->log.logfile); + ctx->log.logfile = NULL; + ctx->log.gf_log_logfile = NULL; + } + if (strcmp(file, "-") == 0) { int dupfd = -1; @@ -744,6 +671,25 @@ gf_log_init(void *data, const char *file, const char *ident) return -1; } } else { + /* Also create parent dir */ + char *logdir = gf_strdup(file); + if (!logdir) { + return -1; + } + char *tmp_index = rindex(logdir, '/'); + if (tmp_index) { + tmp_index[0] = '\0'; + } + if (mkdir_p(logdir, 0755, true)) { + /* EEXIST is handled in mkdir_p() itself */ + gf_smsg("logging", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR, + "logdir=%s", logdir, "errno=%s", strerror(errno), NULL); + GF_FREE(logdir); + return -1; + } + /* no need of this variable */ + GF_FREE(logdir); + ctx->log.filename = gf_strdup(file); if (!ctx->log.filename) { fprintf(stderr, @@ -781,12 +727,8 @@ gf_log_init(void *data, const char *file, const char *ident) void set_sys_log_level(gf_loglevel_t level) { - glusterfs_ctx_t *ctx = NULL; - - ctx = THIS->ctx; - - if (ctx) - ctx->log.sys_log_level = level; + if (THIS->ctx) + THIS->ctx->log.sys_log_level = level; } /* Check if we should be logging @@ -796,21 +738,17 @@ set_sys_log_level(gf_loglevel_t level) static gf_boolean_t skip_logging(xlator_t *this, gf_loglevel_t level) { - gf_boolean_t ret = _gf_false; - gf_loglevel_t existing_level = GF_LOG_NONE; + gf_loglevel_t existing_level = this->loglevel ? this->loglevel + : this->ctx->log.loglevel; + if (level > existing_level) { + return _gf_true; + } if (level == GF_LOG_NONE) { - ret = _gf_true; - goto out; + return _gf_true; } - existing_level = this->loglevel ? this->loglevel : this->ctx->log.loglevel; - if (level > existing_level) { - ret = _gf_true; - goto out; - } -out: - return ret; + return _gf_false; } int @@ -818,24 +756,19 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, int line, gf_loglevel_t level, const char *fmt, ...) { const char *basename = NULL; - xlator_t *this = NULL; - char *str1 = NULL; - char *str2 = NULL; + xlator_t *this = THIS; + char *logline = NULL; char *msg = NULL; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char *callstr = NULL; struct timeval tv = { 0, }; - size_t len = 0; int ret = 0; va_list ap; - glusterfs_ctx_t *ctx = NULL; - - this = THIS; - ctx = this->ctx; + glusterfs_ctx_t *ctx = this->ctx; if (!ctx) goto out; @@ -843,18 +776,6 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, if (skip_logging(this, level)) goto out; - static char *level_strings[] = {"", /* NONE */ - "M", /* EMERGENCY */ - "A", /* ALERT */ - "C", /* CRITICAL */ - "E", /* ERROR */ - "W", /* WARNING */ - "N", /* NOTICE */ - "I", /* INFO */ - "D", /* DEBUG */ - "T", /* TRACE */ - ""}; - if (!domain || !file || !function || !fmt) { fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__, __PRETTY_FUNCTION__, __LINE__); @@ -871,6 +792,13 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, * to avoid allocating memory from the heap*/ callstr = gf_backtrace_save(NULL); + va_start(ap, fmt); + ret = vasprintf(&msg, fmt, ap); + va_end(ap); + if (-1 == ret) { + goto out; + } + if (ctx->log.log_control_file_found) { int priority; /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and @@ -881,12 +809,8 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, priority = level - 1; } - va_start(ap, fmt); - vasprintf(&str2, fmt, ap); - va_end(ap); - gf_syslog(priority, "[%s:%d:%s] %s %d-%s: %s", basename, line, function, - callstr, ((this->graph) ? this->graph->id : 0), domain, str2); + callstr, ((this->graph) ? this->graph->id : 0), domain, msg); goto out; } @@ -894,42 +818,24 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, ret = gettimeofday(&tv, NULL); if (-1 == ret) goto out; - va_start(ap, fmt); - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, tv.tv_usec); - ret = gf_asprintf(&str1, "[%s] %s [%s:%d:%s] %s %d-%s: ", timestr, - level_strings[level], basename, line, function, callstr, - ((this->graph) ? this->graph->id : 0), domain); - if (-1 == ret) { - goto out; - } + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); - ret = vasprintf(&str2, fmt, ap); + ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %s %d-%s: %s\n", timestr, + gf_level_strings[level], basename, line, function, + callstr, ((this->graph) ? this->graph->id : 0), domain, + msg); if (-1 == ret) { goto out; } - va_end(ap); - - len = strlen(str1); - msg = GF_MALLOC(len + strlen(str2) + 1, gf_common_mt_char); - if (!msg) { - ret = -1; - goto out; - } - - strcpy(msg, str1); - strcpy(msg + len, str2); - pthread_mutex_lock(&ctx->log.logfile_mutex); { if (ctx->log.logfile) { - fprintf(ctx->log.logfile, "%s\n", msg); + fputs(logline, ctx->log.logfile); fflush(ctx->log.logfile); } else if (ctx->log.loglevel >= level) { - fprintf(stderr, "%s\n", msg); + fputs(logline, stderr); fflush(stderr); } @@ -938,25 +844,22 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function, and trace logs */ if (ctx->log.gf_log_syslog && level && (level <= ctx->log.sys_log_level)) - syslog((level - 1), "%s\n", msg); + syslog((level - 1), "%s", logline); #endif } pthread_mutex_unlock(&ctx->log.logfile_mutex); out: - GF_FREE(msg); - - GF_FREE(str1); - FREE(str2); + GF_FREE(logline); - va_end(ap); + FREE(msg); return ret; } -int +static int _gf_msg_plain_internal(gf_loglevel_t level, const char *msg) { xlator_t *this = NULL; @@ -1113,10 +1016,12 @@ _gf_msg_backtrace_nomem(gf_loglevel_t level, int stacksize) goto out; bt_size = backtrace(array, ((stacksize <= 200) ? stacksize : 200)); + if (!bt_size) + goto out; pthread_mutex_lock(&ctx->log.logfile_mutex); { fd = ctx->log.logfile ? fileno(ctx->log.logfile) : fileno(stderr); - if (bt_size && (fd != -1)) { + if (fd != -1) { /* print to the file fd, to prevent any allocations from backtrace_symbols */ @@ -1182,12 +1087,13 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, char msg[2048] = { 0, }; - char timestr[GF_LOG_TIMESTR_SIZE] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; glusterfs_ctx_t *ctx = NULL; int wlen = 0; int priority; + struct rusage r_usage; this = THIS; ctx = this->ctx; @@ -1209,25 +1115,23 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, ret = gettimeofday(&tv, NULL); if (-1 == ret) goto out; - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - ret = snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, tv.tv_usec); - if (-1 == ret) { - goto out; - } + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); /* TODO: Currently we print in the enhanced format, with a message ID * of 0. Need to enhance this to support format as configured */ - ret = snprintf(msg, sizeof msg, - "[%s] %s [MSGID: %" PRIu64 - "]" - " [%s:%d:%s] %s: no memory " - "available for size (%" GF_PRI_SIZET - ")" - " [call stack follows]\n", - timestr, gf_level_strings[level], (uint64_t)0, basename, - line, function, domain, size); - if (-1 == ret) { + wlen = snprintf( + msg, sizeof msg, + "[%s] %c [MSGID: %" PRIu64 + "]" + " [%s:%d:%s] %s: no memory " + "available for size (%" GF_PRI_SIZET + ") current memory usage in kilobytes %ld" + " [call stack follows]\n", + timestr, gf_level_strings[level], (uint64_t)0, basename, line, function, + domain, size, + (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0)); + if (-1 == wlen) { + ret = -1; goto out; } @@ -1257,8 +1161,6 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, goto out; } - wlen = strlen(msg); - /* write directly to the fd to prevent out of order * message and stack */ ret = sys_write(fd, msg, wlen); @@ -1372,93 +1274,77 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file, int errnum, uint64_t msgid, char **appmsgstr, char *callstr, struct timeval tv, int graph_id, gf_log_format_t fmt) { - char timestr[GF_LOG_TIMESTR_SIZE] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char *header = NULL; char *footer = NULL; - char *msg = NULL; - size_t hlen = 0, flen = 0, mlen = 0; int ret = 0; /* rotate if required */ gf_log_rotate(ctx); /* format the time stamp */ - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, tv.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); - /* generate header and footer */ + /* generate footer */ + if (errnum) { + ret = gf_asprintf(&footer, " [%s]\n", strerror(errnum)); + } else { + ret = gf_asprintf(&footer, " \n"); + } + if (-1 == ret) { + goto err; + } + + /* generate message, inc. the header */ if (fmt == gf_logformat_traditional) { if (!callstr) { ret = gf_asprintf(&header, - "[%s] %s [%s:%d:%s]" - " %d-%s: ", + "[%s] %c [%s:%d:%s]" + " %d-%s: %s", timestr, gf_level_strings[level], file, line, - function, graph_id, domain); + function, graph_id, domain, *appmsgstr); } else { ret = gf_asprintf(&header, - "[%s] %s [%s:%d:%s] %s" - " %d-%s: ", + "[%s] %c [%s:%d:%s] %s" + " %d-%s: %s", timestr, gf_level_strings[level], file, line, - function, callstr, graph_id, domain); - } - if (-1 == ret) { - goto err; + function, callstr, graph_id, domain, *appmsgstr); } } else { /* gf_logformat_withmsgid */ /* CEE log format unsupported in logger_glusterlog, so just * print enhanced log format */ if (!callstr) { ret = gf_asprintf(&header, - "[%s] %s [MSGID: %" PRIu64 + "[%s] %c [MSGID: %" PRIu64 "]" - " [%s:%d:%s] %d-%s: ", + " [%s:%d:%s] %d-%s: %s", timestr, gf_level_strings[level], msgid, file, - line, function, graph_id, domain); + line, function, graph_id, domain, *appmsgstr); } else { ret = gf_asprintf(&header, - "[%s] %s [MSGID: %" PRIu64 + "[%s] %c [MSGID: %" PRIu64 "]" - " [%s:%d:%s] %s %d-%s: ", + " [%s:%d:%s] %s %d-%s: %s", timestr, gf_level_strings[level], msgid, file, - line, function, callstr, graph_id, domain); - } - if (-1 == ret) { - goto err; + line, function, callstr, graph_id, domain, + *appmsgstr); } } - - if (errnum) { - ret = gf_asprintf(&footer, " [%s]", strerror(errnum)); - if (-1 == ret) { - goto err; - } - } - - /* generate the full message to log */ - hlen = strlen(header); - flen = footer ? strlen(footer) : 0; - mlen = strlen(*appmsgstr); - msg = GF_MALLOC(hlen + flen + mlen + 1, gf_common_mt_char); - if (!msg) { - ret = -1; + if (-1 == ret) { goto err; } - strcpy(msg, header); - strcpy(msg + hlen, *appmsgstr); - if (footer) - strcpy(msg + hlen + mlen, footer); + /* send the full message to log */ pthread_mutex_lock(&ctx->log.logfile_mutex); { if (ctx->log.logfile) { - fprintf(ctx->log.logfile, "%s\n", msg); + fprintf(ctx->log.logfile, "%s%s", header, footer); fflush(ctx->log.logfile); } else if (ctx->log.loglevel >= level) { - fprintf(stderr, "%s\n", msg); + fprintf(stderr, "%s%s", header, footer); fflush(stderr); } @@ -1466,8 +1352,9 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file, /* We want only serious logs in 'syslog', not our debug * and trace logs */ if (ctx->log.gf_log_syslog && level && - (level <= ctx->log.sys_log_level)) - syslog((level - 1), "%s\n", msg); + (level <= ctx->log.sys_log_level)) { + syslog((level - 1), "%s%s", header, footer); + } #endif } @@ -1478,7 +1365,6 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file, ret = 0; err: - GF_FREE(msg); GF_FREE(header); GF_FREE(footer); @@ -1494,39 +1380,34 @@ gf_syslog_log_repetitions(const char *domain, const char *file, int graph_id) { int priority; - char timestr_latest[256] = { + char timestr_latest[GF_TIMESTR_SIZE] = { 0, }; - char timestr_oldest[256] = { + char timestr_oldest[GF_TIMESTR_SIZE] = { 0, }; SET_LOG_PRIO(level, priority); - gf_time_fmt(timestr_latest, sizeof timestr_latest, latest.tv_sec, - gf_timefmt_FT); - snprintf(timestr_latest + strlen(timestr_latest), - sizeof(timestr_latest) - strlen(timestr_latest), - ".%" GF_PRI_SUSECONDS, latest.tv_usec); - - gf_time_fmt(timestr_oldest, sizeof timestr_oldest, oldest.tv_sec, - gf_timefmt_FT); - snprintf(timestr_oldest + strlen(timestr_oldest), - sizeof(timestr_oldest) - strlen(timestr_oldest), - ".%" GF_PRI_SUSECONDS, oldest.tv_usec); + gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest, + gf_timefmt_FT); + gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest, + gf_timefmt_FT); if (errnum) { syslog(priority, "The message \"[MSGID: %" PRIu64 "] [%s:%d:%s] " - "%d-%s: %s [%s] \" repeated %d times between %s and %s", + "%d-%s: %s [%s] \" repeated %d times between %s" + " and %s", msgid, file, line, function, graph_id, domain, *appmsgstr, strerror(errnum), refcount, timestr_oldest, timestr_latest); } else { syslog(priority, "The message \"[MSGID: %" PRIu64 "] [%s:%d:%s] " - "%d-%s: %s \" repeated %d times between %s and %s", + "%d-%s: %s \" repeated %d times between %s" + " and %s", msgid, file, line, function, graph_id, domain, *appmsgstr, refcount, timestr_oldest, timestr_latest); } @@ -1542,13 +1423,10 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain, struct timeval latest, int graph_id) { int ret = 0; - size_t hlen = 0; - size_t flen = 0; - size_t mlen = 0; - char timestr_latest[256] = { + char timestr_latest[GF_TIMESTR_SIZE] = { 0, }; - char timestr_oldest[256] = { + char timestr_oldest[GF_TIMESTR_SIZE] = { 0, }; char errstr[256] = { @@ -1556,65 +1434,45 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain, }; char *header = NULL; char *footer = NULL; - char *msg = NULL; if (!ctx) goto err; gf_log_rotate(ctx); - gf_time_fmt(timestr_latest, sizeof timestr_latest, latest.tv_sec, - gf_timefmt_FT); - snprintf(timestr_latest + strlen(timestr_latest), - sizeof(timestr_latest) - strlen(timestr_latest), - ".%" GF_PRI_SUSECONDS, latest.tv_usec); - - gf_time_fmt(timestr_oldest, sizeof timestr_oldest, oldest.tv_sec, - gf_timefmt_FT); - snprintf(timestr_oldest + strlen(timestr_oldest), - sizeof(timestr_oldest) - strlen(timestr_oldest), - ".%" GF_PRI_SUSECONDS, oldest.tv_usec); - ret = gf_asprintf(&header, - "The message \"%s [MSGID: %" PRIu64 + "The message \"%c [MSGID: %" PRIu64 "]" - " [%s:%d:%s] %d-%s: ", + " [%s:%d:%s] %d-%s: %s", gf_level_strings[level], msgid, file, line, function, - graph_id, domain); - if (-1 == ret) + graph_id, domain, *appmsgstr); + if (-1 == ret) { goto err; + } + + gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest, + gf_timefmt_FT); + + gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest, + gf_timefmt_FT); if (errnum) snprintf(errstr, sizeof(errstr) - 1, " [%s]", strerror(errnum)); - ret = gf_asprintf(&footer, - "%s\" repeated %d times between" - " [%s] and [%s]", + ret = gf_asprintf(&footer, "%s\" repeated %d times between [%s] and [%s]", errstr, refcount, timestr_oldest, timestr_latest); - if (-1 == ret) - goto err; - - /* generate the full message to log */ - hlen = strlen(header); - flen = strlen(footer); - mlen = strlen(*appmsgstr); - msg = GF_MALLOC(hlen + flen + mlen + 1, gf_common_mt_char); - if (!msg) { + if (-1 == ret) { ret = -1; goto err; } - strcpy(msg, header); - strcpy(msg + hlen, *appmsgstr); - strcpy(msg + hlen + mlen, footer); - pthread_mutex_lock(&ctx->log.logfile_mutex); { if (ctx->log.logfile) { - fprintf(ctx->log.logfile, "%s\n", msg); + fprintf(ctx->log.logfile, "%s%s\n", header, footer); fflush(ctx->log.logfile); } else if (ctx->log.loglevel >= level) { - fprintf(stderr, "%s\n", msg); + fprintf(stderr, "%s%s\n", header, footer); fflush(stderr); } @@ -1623,7 +1481,7 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain, * and trace logs */ if (ctx->log.gf_log_syslog && level && (level <= ctx->log.sys_log_level)) - syslog((level - 1), "%s\n", msg); + syslog((level - 1), "%s%s\n", header, footer); #endif } @@ -1634,7 +1492,6 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain, ret = 0; err: - GF_FREE(msg); GF_FREE(header); GF_FREE(footer); @@ -1650,9 +1507,7 @@ gf_log_print_with_repetitions(glusterfs_ctx_t *ctx, const char *domain, struct timeval latest, int graph_id) { int ret = -1; - gf_log_logger_t logger = 0; - - logger = ctx->log.logger; + gf_log_logger_t logger = ctx->log.logger; switch (logger) { case gf_logger_syslog: @@ -1662,6 +1517,11 @@ gf_log_print_with_repetitions(glusterfs_ctx_t *ctx, const char *domain, appmsgstr, callstr, refcount, oldest, latest, graph_id); break; } + /* NOTE: If syslog control file is absent, which is another + * way to control logging to syslog, then we will fall through + * to the gluster log. The ideal way to do things would be to + * not have the extra control file check */ + case gf_logger_glusterlog: ret = gf_glusterlog_log_repetitions( ctx, domain, file, function, line, level, errnum, msgid, @@ -1986,7 +1846,7 @@ _gf_msg_internal(const char *domain, const char *file, const char *function, if (size == 0) { flush_logged_msg = _gf_true; goto unlock; - } else if ((ctx->log.lru_cur_size + 1) > size) { + } else if (((ctx->log.lru_cur_size + 1) > size) && (first)) { /* If the list is full, flush the lru msg to disk and also * release it after unlock, and ... * */ @@ -2003,7 +1863,7 @@ _gf_msg_internal(const char *domain, const char *file, const char *function, /* create a new list element, initialise and enqueue it. * Additionally, this being the first occurrence of the msg, * log it directly to disk after unlock. */ - buf_new = log_buf_new(); + buf_new = mem_get0(THIS->ctx->logbuf_pool); if (!buf_new) { ret = -1; goto unlock; @@ -2055,23 +1915,11 @@ _gf_msg(const char *domain, const char *file, const char *function, int ret = 0; char *msgstr = NULL; va_list ap; - xlator_t *this = NULL; + xlator_t *this = THIS; glusterfs_ctx_t *ctx = NULL; - char callstr[GF_LOG_BACKTRACE_SIZE] = { - 0, - }; - int passcallstr = 0; + char *callstr = NULL; int log_inited = 0; - /* in args check */ - if (!domain || !file || !function || !fmt) { - fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__, - __PRETTY_FUNCTION__, __LINE__); - return -1; - } - - this = THIS; - if (this == NULL) return -1; @@ -2085,22 +1933,12 @@ _gf_msg(const char *domain, const char *file, const char *function, if (skip_logging(this, level)) goto out; - if (trace) { - ret = _gf_msg_backtrace(GF_LOG_BACKTRACE_DEPTH, callstr, - GF_LOG_BACKTRACE_SIZE); - if (ret >= 0) - passcallstr = 1; - else - ret = 0; - } - - pthread_mutex_lock(&ctx->log.logfile_mutex); - { - if (ctx->log.logfile) { - log_inited = 1; - } + /* in args check */ + if (!domain || !file || !function || !fmt) { + fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__, + __PRETTY_FUNCTION__, __LINE__); + return -1; } - pthread_mutex_unlock(&ctx->log.logfile_mutex); /* form the message */ va_start(ap, fmt); @@ -2109,15 +1947,35 @@ _gf_msg(const char *domain, const char *file, const char *function, /* log */ if (ret != -1) { + if (trace) { + callstr = GF_MALLOC(GF_LOG_BACKTRACE_SIZE, gf_common_mt_char); + if (callstr == NULL) + return -1; + + ret = _gf_msg_backtrace(GF_LOG_BACKTRACE_DEPTH, callstr, + GF_LOG_BACKTRACE_SIZE); + if (ret < 0) { + GF_FREE(callstr); + callstr = NULL; + } + } + + pthread_mutex_lock(&ctx->log.logfile_mutex); + { + if (ctx->log.logfile) { + log_inited = 1; + } + } + pthread_mutex_unlock(&ctx->log.logfile_mutex); + if (!log_inited && ctx->log.gf_log_syslog) { ret = gf_log_syslog( ctx, domain, file, function, line, level, errnum, msgid, - &msgstr, (passcallstr ? callstr : NULL), + &msgstr, (callstr ? callstr : NULL), (this->graph) ? this->graph->id : 0, gf_logformat_traditional); } else { ret = _gf_msg_internal(domain, file, function, line, level, errnum, - msgid, &msgstr, - (passcallstr ? callstr : NULL), + msgid, &msgstr, (callstr ? callstr : NULL), (this->graph) ? this->graph->id : 0); } } else { @@ -2125,7 +1983,8 @@ _gf_msg(const char *domain, const char *file, const char *function, * are undefined, be safe */ msgstr = NULL; } - + if (callstr) + GF_FREE(callstr); FREE(msgstr); out: @@ -2141,23 +2000,18 @@ _gf_log(const char *domain, const char *file, const char *function, int line, const char *basename = NULL; FILE *new_logfile = NULL; va_list ap; - char timestr[GF_LOG_TIMESTR_SIZE] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; struct timeval tv = { 0, }; - char *str1 = NULL; - char *str2 = NULL; + char *logline = NULL; char *msg = NULL; - size_t len = 0; int ret = 0; int fd = -1; - xlator_t *this = NULL; - glusterfs_ctx_t *ctx = NULL; - - this = THIS; - ctx = this->ctx; + xlator_t *this = THIS; + glusterfs_ctx_t *ctx = this->ctx; if (!ctx) goto out; @@ -2165,18 +2019,6 @@ _gf_log(const char *domain, const char *file, const char *function, int line, if (skip_logging(this, level)) goto out; - static char *level_strings[] = {"", /* NONE */ - "M", /* EMERGENCY */ - "A", /* ALERT */ - "C", /* CRITICAL */ - "E", /* ERROR */ - "W", /* WARNING */ - "N", /* NOTICE */ - "I", /* INFO */ - "D", /* DEBUG */ - "T", /* TRACE */ - ""}; - if (!domain || !file || !function || !fmt) { fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__, __PRETTY_FUNCTION__, __LINE__); @@ -2189,6 +2031,13 @@ _gf_log(const char *domain, const char *file, const char *function, int line, else basename = file; + va_start(ap, fmt); + ret = vasprintf(&msg, fmt, ap); + va_end(ap); + if (-1 == ret) { + goto err; + } + if (ctx->log.log_control_file_found) { int priority; /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and @@ -2199,12 +2048,8 @@ _gf_log(const char *domain, const char *file, const char *function, int line, priority = level - 1; } - va_start(ap, fmt); - vasprintf(&str2, fmt, ap); - va_end(ap); - gf_syslog(priority, "[%s:%d:%s] %d-%s: %s", basename, line, function, - ((this->graph) ? this->graph->id : 0), domain, str2); + ((this->graph) ? this->graph->id : 0), domain, msg); goto err; } @@ -2213,16 +2058,17 @@ _gf_log(const char *domain, const char *file, const char *function, int line, fd = sys_open(ctx->log.filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR); if (fd < 0) { - gf_msg("logrotate", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, - "failed to open logfile"); + gf_smsg("logrotate", GF_LOG_ERROR, errno, + LG_MSG_OPEN_LOGFILE_FAILED, NULL); return -1; } sys_close(fd); new_logfile = fopen(ctx->log.filename, "a"); if (!new_logfile) { - gf_msg("logrotate", GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED, - "failed to open logfile %s", ctx->log.filename); + gf_smsg("logrotate", GF_LOG_CRITICAL, errno, + LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s", + ctx->log.filename, NULL); goto log; } @@ -2240,41 +2086,23 @@ log: ret = gettimeofday(&tv, NULL); if (-1 == ret) goto out; - va_start(ap, fmt); - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, tv.tv_usec); - ret = gf_asprintf(&str1, "[%s] %s [%s:%d:%s] %d-%s: ", timestr, - level_strings[level], basename, line, function, - ((this->graph) ? this->graph->id : 0), domain); - if (-1 == ret) { - goto err; - } + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); - ret = vasprintf(&str2, fmt, ap); + ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %d-%s: %s\n", timestr, + gf_level_strings[level], basename, line, function, + ((this->graph) ? this->graph->id : 0), domain, msg); if (-1 == ret) { goto err; } - va_end(ap); - - len = strlen(str1); - msg = GF_MALLOC(len + strlen(str2) + 1, gf_common_mt_char); - if (!msg) { - goto err; - } - - strcpy(msg, str1); - strcpy(msg + len, str2); - pthread_mutex_lock(&ctx->log.logfile_mutex); { if (ctx->log.logfile) { - fprintf(ctx->log.logfile, "%s\n", msg); + fputs(logline, ctx->log.logfile); fflush(ctx->log.logfile); } else if (ctx->log.loglevel >= level) { - fprintf(stderr, "%s\n", msg); + fputs(logline, stderr); fflush(stderr); } @@ -2283,21 +2111,18 @@ log: and trace logs */ if (ctx->log.gf_log_syslog && level && (level <= ctx->log.sys_log_level)) - syslog((level - 1), "%s\n", msg); + syslog((level - 1), "%s", logline); #endif } pthread_mutex_unlock(&ctx->log.logfile_mutex); err: - GF_FREE(msg); + GF_FREE(logline); - GF_FREE(str1); - - FREE(str2); + FREE(msg); out: - va_end(ap); return (0); } @@ -2306,47 +2131,31 @@ _gf_log_eh(const char *function, const char *fmt, ...) { int ret = -1; va_list ap; - char *str1 = NULL; - char *str2 = NULL; + char *logline = NULL; char *msg = NULL; xlator_t *this = NULL; this = THIS; - ret = gf_asprintf(&str1, "[%d] %s: ", ((this->graph) ? this->graph->id : 0), - function); - if (-1 == ret) { - goto out; - } - va_start(ap, fmt); - - ret = vasprintf(&str2, fmt, ap); + ret = vasprintf(&msg, fmt, ap); + va_end(ap); if (-1 == ret) { goto out; } - msg = GF_MALLOC(strlen(str1) + strlen(str2) + 1, gf_common_mt_char); - if (!msg) { - ret = -1; + ret = gf_asprintf(&logline, "[%d] %s: %s", + ((this->graph) ? this->graph->id : 0), function, msg); + if (-1 == ret) { goto out; } - strcpy(msg, str1); - strcat(msg, str2); - - ret = eh_save_history(this->history, msg); - if (ret < 0) - GF_FREE(msg); + ret = eh_save_history(this->history, logline); out: - GF_FREE(str1); - - /* Use FREE instead of GF_FREE since str2 was allocated by vasprintf */ - if (str2) - FREE(str2); + GF_FREE(logline); - va_end(ap); + FREE(msg); return ret; } @@ -2365,8 +2174,8 @@ gf_cmd_log_init(const char *filename) return -1; if (!filename) { - gf_msg(this->name, GF_LOG_CRITICAL, 0, LG_MSG_INVALID_ENTRY, - "gf_cmd_log_init: no filename specified\n"); + gf_smsg(this->name, GF_LOG_CRITICAL, 0, LG_MSG_FILENAME_NOT_SPECIFIED, + "gf_cmd_log_init", NULL); return -1; } @@ -2383,17 +2192,15 @@ gf_cmd_log_init(const char *filename) fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR); if (fd < 0) { - gf_msg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED, - "failed to open cmd_log_file"); + gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED, + "cmd_log_file", NULL); return -1; } ctx->log.cmdlogfile = fdopen(fd, "a"); if (!ctx->log.cmdlogfile) { - gf_msg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED, - "gf_cmd_log_init: failed to open logfile \"%s\" " - "\n", - ctx->log.cmd_log_filename); + gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED, + "gf_cmd_log_init: %s", ctx->log.cmd_log_filename, NULL); sys_close(fd); return -1; } @@ -2404,14 +2211,12 @@ int gf_cmd_log(const char *domain, const char *fmt, ...) { va_list ap; - char timestr[64]; + char timestr[GF_TIMESTR_SIZE]; struct timeval tv = { 0, }; - char *str1 = NULL; - char *str2 = NULL; + char *logline = NULL; char *msg = NULL; - size_t len = 0; int ret = 0; int fd = -1; glusterfs_ctx_t *ctx = NULL; @@ -2433,31 +2238,19 @@ gf_cmd_log(const char *domain, const char *fmt, ...) if (ret == -1) goto out; va_start(ap, fmt); - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), GF_LOG_TIMESTR_SIZE - strlen(timestr), - ".%" GF_PRI_SUSECONDS, tv.tv_usec); - - ret = gf_asprintf(&str1, "[%s] %s : ", timestr, domain); - if (ret == -1) { - goto out; - } - - ret = vasprintf(&str2, fmt, ap); + ret = vasprintf(&msg, fmt, ap); + va_end(ap); if (ret == -1) { goto out; } - va_end(ap); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); - len = strlen(str1); - msg = GF_MALLOC(len + strlen(str2) + 1, gf_common_mt_char); - if (!msg) { + ret = gf_asprintf(&logline, "[%s] %s : %s\n", timestr, domain, msg); + if (ret == -1) { goto out; } - strcpy(msg, str1); - strcpy(msg + len, str2); - /* close and reopen cmdlogfile fd for in case of log rotate*/ if (ctx->log.cmd_history_logrotate) { ctx->log.cmd_history_logrotate = 0; @@ -2470,43 +2263,37 @@ gf_cmd_log(const char *domain, const char *fmt, ...) fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR); if (fd < 0) { - gf_msg(THIS->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED, - "failed to open " - "logfile \"%s\" \n", - ctx->log.cmd_log_filename); + gf_smsg(THIS->name, GF_LOG_CRITICAL, errno, + LG_MSG_OPEN_LOGFILE_FAILED, "name=%s", + ctx->log.cmd_log_filename, NULL); ret = -1; goto out; } ctx->log.cmdlogfile = fdopen(fd, "a"); if (!ctx->log.cmdlogfile) { - gf_msg(THIS->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED, - "failed to open logfile \"%s\"" - " \n", - ctx->log.cmd_log_filename); + gf_smsg(THIS->name, GF_LOG_CRITICAL, errno, + LG_MSG_OPEN_LOGFILE_FAILED, "name=%s", + ctx->log.cmd_log_filename, NULL); ret = -1; sys_close(fd); goto out; } } - fprintf(ctx->log.cmdlogfile, "%s\n", msg); + fputs(logline, ctx->log.cmdlogfile); fflush(ctx->log.cmdlogfile); out: - GF_FREE(msg); - - GF_FREE(str1); - - FREE(str2); + GF_FREE(logline); - va_end(ap); + FREE(msg); return ret; } static int -_do_slog_format(const char *event, va_list inp, char **msg) +_do_slog_format(int errnum, const char *event, va_list inp, char **msg) { va_list valist_tmp; int i = 0; @@ -2519,10 +2306,13 @@ _do_slog_format(const char *event, va_list inp, char **msg) char format_char = '%'; char *tmp1 = NULL; char *tmp2 = NULL; + char temp_sep[3] = ""; - ret = gf_asprintf(&tmp2, "%s", event); - if (ret == -1) + tmp2 = gf_strdup(""); + if (!tmp2) { + ret = -1; goto out; + } /* Hardcoded value for max key value pairs, exits early */ /* from loop if found NULL */ @@ -2570,22 +2360,45 @@ _do_slog_format(const char *event, va_list inp, char **msg) (void)va_arg(inp, void *); } - ret = gf_asprintf(&tmp2, "%s\t%s", tmp1, buffer); + ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, buffer); if (ret < 0) goto out; GF_FREE(buffer); buffer = NULL; } else { - ret = gf_asprintf(&tmp2, "%s\t%s", tmp1, fmt); + ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, fmt); if (ret < 0) goto out; } + /* Set seperator for next iteration */ + temp_sep[0] = ','; + temp_sep[1] = ' '; + temp_sep[2] = 0; + GF_FREE(tmp1); tmp1 = NULL; } + tmp1 = gf_strdup(tmp2); + if (!tmp1) { + ret = -1; + goto out; + } + GF_FREE(tmp2); + tmp2 = NULL; + + if (errnum) { + ret = gf_asprintf(&tmp2, "%s [%s%s{errno=%d}, {error=%s}]", event, tmp1, + temp_sep, errnum, strerror(errnum)); + } else { + ret = gf_asprintf(&tmp2, "%s [%s]", event, tmp1); + } + + if (ret == -1) + goto out; + *msg = gf_strdup(tmp2); if (!*msg) ret = -1; @@ -2611,36 +2424,19 @@ _gf_smsg(const char *domain, const char *file, const char *function, va_list valist; char *msg = NULL; int ret = 0; + xlator_t *this = THIS; - va_start(valist, event); - ret = _do_slog_format(event, valist, &msg); - if (ret == -1) - goto out; - - ret = _gf_msg(domain, file, function, line, level, errnum, trace, msgid, - "%s", msg); - -out: - va_end(valist); - if (msg) - GF_FREE(msg); - return ret; -} - -int -_gf_slog(const char *domain, const char *file, const char *function, int line, - gf_loglevel_t level, const char *event, ...) -{ - va_list valist; - char *msg = NULL; - int ret = 0; + if (skip_logging(this, level)) + return ret; va_start(valist, event); - ret = _do_slog_format(event, valist, &msg); + ret = _do_slog_format(errnum, event, valist, &msg); if (ret == -1) goto out; - ret = _gf_log(domain, file, function, line, level, "%s", msg); + /* Pass errnum as zero since it is already formated as required */ + ret = _gf_msg(domain, file, function, line, level, 0, trace, msgid, "%s", + msg); out: va_end(valist); diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c index cff96323867..2d5a12b0a00 100644 --- a/libglusterfs/src/mem-pool.c +++ b/libglusterfs/src/mem-pool.c @@ -8,14 +8,14 @@ cases as published by the Free Software Foundation. */ -#include "mem-pool.h" -#include "logging.h" -#include "xlator.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/common-utils.h" // for GF_ASSERT, gf_thread_cr... +#include "glusterfs/globals.h" // for xlator_t, THIS #include <stdlib.h> #include <stdarg.h> #include "unittest/unittest.h" -#include "libglusterfs-messages.h" +#include "glusterfs/libglusterfs-messages.h" void gf_mem_acct_enable_set(void *data) @@ -35,61 +35,101 @@ gf_mem_acct_enable_set(void *data) return; } -int -gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type, - const char *typestr) +static void * +gf_mem_header_prepare(struct mem_header *header, size_t size) { - void *ptr = NULL; - struct mem_header *header = NULL; + void *ptr; - if (!alloc_ptr) - return -1; + header->size = size; - ptr = *alloc_ptr; + ptr = header + 1; - GF_ASSERT(xl != NULL); + /* data follows in this gap of 'size' bytes */ + *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC; - GF_ASSERT(xl->mem_acct != NULL); + return ptr; +} - GF_ASSERT(type <= xl->mem_acct->num_types); +static void * +gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header, + size_t size, uint32_t type, const char *typestr) +{ + struct mem_acct_rec *rec = NULL; + bool new_ref = false; - LOCK(&xl->mem_acct->rec[type].lock); - { - if (!xl->mem_acct->rec[type].typestr) - xl->mem_acct->rec[type].typestr = typestr; - xl->mem_acct->rec[type].size += size; - xl->mem_acct->rec[type].num_allocs++; - xl->mem_acct->rec[type].total_allocs++; - xl->mem_acct->rec[type].max_size = max(xl->mem_acct->rec[type].max_size, - xl->mem_acct->rec[type].size); - xl->mem_acct->rec[type].max_num_allocs = max( - xl->mem_acct->rec[type].max_num_allocs, - xl->mem_acct->rec[type].num_allocs); - } - UNLOCK(&xl->mem_acct->rec[type].lock); + if (mem_acct != NULL) { + GF_ASSERT(type <= mem_acct->num_types); - GF_ATOMIC_INC(xl->mem_acct->refcnt); + rec = &mem_acct->rec[type]; + LOCK(&rec->lock); + { + if (!rec->typestr) { + rec->typestr = typestr; + } + rec->size += size; + new_ref = (rec->num_allocs == 0); + rec->num_allocs++; + rec->total_allocs++; + rec->max_size = max(rec->max_size, rec->size); + rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs); + +#ifdef DEBUG + list_add(&header->acct_list, &rec->obj_list); +#endif + } + UNLOCK(&rec->lock); + + /* We only take a reference for each memory type used, not for each + * allocation. This minimizes the use of atomic operations. */ + if (new_ref) { + GF_ATOMIC_INC(mem_acct->refcnt); + } + } - header = (struct mem_header *)ptr; header->type = type; - header->size = size; - header->mem_acct = xl->mem_acct; + header->mem_acct = mem_acct; header->magic = GF_MEM_HEADER_MAGIC; + return gf_mem_header_prepare(header, size); +} + +static void * +gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header, + size_t size) +{ + struct mem_acct_rec *rec = NULL; + + if (mem_acct != NULL) { + rec = &mem_acct->rec[header->type]; + LOCK(&rec->lock); + { + rec->size += size - header->size; + rec->total_allocs++; + rec->max_size = max(rec->max_size, rec->size); + #ifdef DEBUG - INIT_LIST_HEAD(&header->acct_list); - LOCK(&xl->mem_acct->rec[type].lock); - { - list_add(&header->acct_list, &(xl->mem_acct->rec[type].obj_list)); - } - UNLOCK(&xl->mem_acct->rec[type].lock); + /* The old 'header' already was present in 'obj_list', but + * realloc() could have changed its address. We need to remove + * the old item from the list and add the new one. This can be + * done this way because list_move() doesn't use the pointers + * to the old location (which are not valid anymore) already + * present in the list, it simply overwrites them. */ + list_move(&header->acct_list, &rec->obj_list); #endif - ptr += sizeof(struct mem_header); - /* data follows in this gap of 'size' bytes */ - *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC; + } + UNLOCK(&rec->lock); + } - *alloc_ptr = ptr; - return 0; + return gf_mem_header_prepare(header, size); +} + +static bool +gf_mem_acct_enabled(void) +{ + xlator_t *x = THIS; + /* Low-level __gf_xxx() may be called + before ctx is initialized. */ + return x->ctx && x->ctx->mem_acct_enable; } void * @@ -97,10 +137,10 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr) { size_t tot_size = 0; size_t req_size = 0; - char *ptr = NULL; + void *ptr = NULL; xlator_t *xl = NULL; - if (!THIS->ctx->mem_acct_enable) + if (!gf_mem_acct_enabled()) return CALLOC(nmemb, size); xl = THIS; @@ -114,19 +154,18 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr) gf_msg_nomem("", GF_LOG_ALERT, tot_size); return NULL; } - gf_mem_set_acct_info(xl, &ptr, req_size, type, typestr); - return (void *)ptr; + return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr); } void * __gf_malloc(size_t size, uint32_t type, const char *typestr) { size_t tot_size = 0; - char *ptr = NULL; + void *ptr = NULL; xlator_t *xl = NULL; - if (!THIS->ctx->mem_acct_enable) + if (!gf_mem_acct_enabled()) return MALLOC(size); xl = THIS; @@ -138,84 +177,32 @@ __gf_malloc(size_t size, uint32_t type, const char *typestr) gf_msg_nomem("", GF_LOG_ALERT, tot_size); return NULL; } - gf_mem_set_acct_info(xl, &ptr, size, type, typestr); - return (void *)ptr; + return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr); } void * __gf_realloc(void *ptr, size_t size) { size_t tot_size = 0; - char *new_ptr; - struct mem_header *old_header = NULL; - struct mem_header *new_header = NULL; - struct mem_header tmp_header; + struct mem_header *header = NULL; - if (!THIS->ctx->mem_acct_enable) + if (!gf_mem_acct_enabled()) return REALLOC(ptr, size); REQUIRE(NULL != ptr); - old_header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE); - GF_ASSERT(old_header->magic == GF_MEM_HEADER_MAGIC); - tmp_header = *old_header; - -#ifdef DEBUG - int type = 0; - size_t copy_size = 0; - - /* Making these changes for realloc is not straightforward. So - * I am simulating realloc using calloc and free - */ - - type = tmp_header.type; - new_ptr = __gf_calloc(1, size, type, - tmp_header.mem_acct->rec[type].typestr); - if (new_ptr) { - copy_size = (size > tmp_header.size) ? tmp_header.size : size; - memcpy(new_ptr, ptr, copy_size); - __gf_free(ptr); - } - - /* This is not quite what the man page says should happen */ - return new_ptr; -#endif + header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE); + GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC); tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE; - new_ptr = realloc(old_header, tot_size); - if (!new_ptr) { + header = realloc(header, tot_size); + if (!header) { gf_msg_nomem("", GF_LOG_ALERT, tot_size); return NULL; } - /* - * We used to pass (char **)&ptr as the second - * argument after the value of realloc was saved - * in ptr, but the compiler warnings complained - * about the casting to and forth from void ** to - * char **. - * TBD: it would be nice to adjust the memory accounting info here, - * but calling gf_mem_set_acct_info here is wrong because it bumps - * up counts as though this is a new allocation - which it's not. - * The consequence of doing nothing here is only that the sizes will be - * wrong, but at least the counts won't be. - uint32_t type = 0; - xlator_t *xl = NULL; - type = header->type; - xl = (xlator_t *) header->xlator; - gf_mem_set_acct_info (xl, &new_ptr, size, type, NULL); - */ - - new_header = (struct mem_header *)new_ptr; - *new_header = tmp_header; - new_header->size = size; - - new_ptr += sizeof(struct mem_header); - /* data follows in this gap of 'size' bytes */ - *(uint32_t *)(new_ptr + size) = GF_MEM_TRAILER_MAGIC; - - return (void *)new_ptr; + return gf_mem_update_acct_info(header->mem_acct, header, size); } int @@ -300,14 +287,30 @@ __gf_mem_invalidate(void *ptr) } #endif /* DEBUG */ +/* Coverity taint NOTE: pointers passed to free, would operate on +pointer-GF_MEM_HEADER_SIZE content and if the pointer was used for any IO +related purpose, the pointer stands tainted, and hence coverity would consider +access to the said region as tainted. The following directive to coverity hence +sanitizes the pointer, thus removing any taint to the same within this function. +If the pointer is accessed outside the scope of this function without any +checks on content read from an IO operation, taints will still be reported, and +needs appropriate addressing. */ + +/* coverity[ +tainted_data_sanitize : arg-0 ] */ +static void +gf_free_sanitize(void *s) +{ +} + void __gf_free(void *free_ptr) { void *ptr = NULL; struct mem_acct *mem_acct; struct mem_header *header = NULL; + bool last_ref = false; - if (!THIS->ctx->mem_acct_enable) { + if (!gf_mem_acct_enabled()) { FREE(free_ptr); return; } @@ -315,6 +318,7 @@ __gf_free(void *free_ptr) if (!free_ptr) return; + gf_free_sanitize(free_ptr); ptr = free_ptr - GF_MEM_HEADER_SIZE; header = (struct mem_header *)ptr; @@ -336,16 +340,18 @@ __gf_free(void *free_ptr) mem_acct->rec[header->type].num_allocs--; /* If all the instances are freed up then ensure typestr is set * to NULL */ - if (!mem_acct->rec[header->type].num_allocs) + if (!mem_acct->rec[header->type].num_allocs) { + last_ref = true; mem_acct->rec[header->type].typestr = NULL; + } #ifdef DEBUG list_del(&header->acct_list); #endif } UNLOCK(&mem_acct->rec[header->type].lock); - if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { - FREE(mem_acct); + if (last_ref) { + xlator_mem_acct_unref(mem_acct); } free: @@ -356,11 +362,30 @@ free: FREE(ptr); } -#define POOL_SMALLEST 7 /* i.e. 128 */ -#define POOL_LARGEST 20 /* i.e. 1048576 */ -#define NPOOLS (POOL_LARGEST - POOL_SMALLEST + 1) +#if defined(GF_DISABLE_MEMPOOL) + +struct mem_pool * +mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, + unsigned long count, char *name) +{ + struct mem_pool *new; + + new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool); + if (!new) + return NULL; + + new->sizeof_type = sizeof_type; + return new; +} + +void +mem_pool_destroy(struct mem_pool *pool) +{ + GF_FREE(pool); +} + +#else /* !GF_DISABLE_MEMPOOL */ -static pthread_key_t pool_key; static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER; static struct list_head pool_threads; static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER; @@ -368,23 +393,18 @@ static struct list_head pool_free_threads; static struct mem_pool_shared pools[NPOOLS]; static size_t pool_list_size; -#if !defined(GF_DISABLE_MEMPOOL) +static __thread per_thread_pool_list_t *thread_pool_list = NULL; + #define N_COLD_LISTS 1024 #define POOL_SWEEP_SECS 30 -static unsigned long sweep_times; -static unsigned long sweep_usecs; -static unsigned long frees_to_system; - typedef struct { - struct list_head death_row; pooled_obj_hdr_t *cold_lists[N_COLD_LISTS]; unsigned int n_cold_lists; } sweep_state_t; enum init_state { GF_MEMPOOL_INIT_NONE = 0, - GF_MEMPOOL_INIT_PREINIT, GF_MEMPOOL_INIT_EARLY, GF_MEMPOOL_INIT_LATE, GF_MEMPOOL_INIT_DESTROY @@ -395,39 +415,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; static unsigned int init_count = 0; static pthread_t sweeper_tid; -void +static bool collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list) { unsigned int i; per_thread_pool_t *pt_pool; - if (pool_list->poison) { - list_del(&pool_list->thr_list); - list_add(&pool_list->thr_list, &state->death_row); - return; - } - - if (state->n_cold_lists >= N_COLD_LISTS) { - return; - } - (void)pthread_spin_lock(&pool_list->lock); + for (i = 0; i < NPOOLS; ++i) { pt_pool = &pool_list->pools[i]; if (pt_pool->cold_list) { + if (state->n_cold_lists >= N_COLD_LISTS) { + (void)pthread_spin_unlock(&pool_list->lock); + return true; + } state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; } pt_pool->cold_list = pt_pool->hot_list; pt_pool->hot_list = NULL; - if (state->n_cold_lists >= N_COLD_LISTS) { - /* We'll just catch up on a future pass. */ - break; - } } + (void)pthread_spin_unlock(&pool_list->lock); + + return false; } -void +static void free_obj_list(pooled_obj_hdr_t *victim) { pooled_obj_hdr_t *next; @@ -436,78 +450,101 @@ free_obj_list(pooled_obj_hdr_t *victim) next = victim->next; free(victim); victim = next; - ++frees_to_system; } } -void * +static void * pool_sweeper(void *arg) { sweep_state_t state; per_thread_pool_list_t *pool_list; - per_thread_pool_list_t *next_pl; - per_thread_pool_t *pt_pool; - unsigned int i; - struct timeval begin_time; - struct timeval end_time; - struct timeval elapsed; + uint32_t i; + bool pending; /* * This is all a bit inelegant, but the point is to avoid doing * expensive things (like freeing thousands of objects) while holding a - * global lock. Thus, we split each iteration into three passes, with + * global lock. Thus, we split each iteration into two passes, with * only the first and fastest holding the lock. */ + pending = true; + for (;;) { - sleep(POOL_SWEEP_SECS); + /* If we know there's pending work to do (or it's the first run), we + * do collect garbage more often. */ + sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS); + (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); - INIT_LIST_HEAD(&state.death_row); state.n_cold_lists = 0; + pending = false; /* First pass: collect stuff that needs our attention. */ - (void)gettimeofday(&begin_time, NULL); (void)pthread_mutex_lock(&pool_lock); - list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list) + list_for_each_entry(pool_list, &pool_threads, thr_list) { - collect_garbage(&state, pool_list); - } - (void)pthread_mutex_unlock(&pool_lock); - (void)gettimeofday(&end_time, NULL); - timersub(&end_time, &begin_time, &elapsed); - sweep_usecs += elapsed.tv_sec * 1000000 + elapsed.tv_usec; - sweep_times += 1; - - /* Second pass: free dead pools. */ - (void)pthread_mutex_lock(&pool_free_lock); - list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list) - { - for (i = 0; i < NPOOLS; ++i) { - pt_pool = &pool_list->pools[i]; - free_obj_list(pt_pool->cold_list); - free_obj_list(pt_pool->hot_list); - pt_pool->hot_list = pt_pool->cold_list = NULL; + if (collect_garbage(&state, pool_list)) { + pending = true; } - list_del(&pool_list->thr_list); - list_add(&pool_list->thr_list, &pool_free_threads); } - (void)pthread_mutex_unlock(&pool_free_lock); + (void)pthread_mutex_unlock(&pool_lock); - /* Third pass: free cold objects from live pools. */ + /* Second pass: free cold objects from live pools. */ for (i = 0; i < state.n_cold_lists; ++i) { free_obj_list(state.cold_lists[i]); } (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); } + + return NULL; } void -pool_destructor(void *arg) +mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) { - per_thread_pool_list_t *pool_list = arg; + per_thread_pool_t *pt_pool; + uint32_t i; - /* The pool-sweeper thread will take it from here. */ - pool_list->poison = 1; + if (pool_list == NULL) { + pool_list = thread_pool_list; + } + + /* The current thread is terminating. None of the allocated objects will + * be used again. We can directly destroy them here instead of delaying + * it until the next sweeper loop. */ + if (pool_list != NULL) { + /* Remove pool_list from the global list to avoid that sweeper + * could touch it. */ + pthread_mutex_lock(&pool_lock); + list_del(&pool_list->thr_list); + pthread_mutex_unlock(&pool_lock); + + /* We need to protect hot/cold changes from potential mem_put() calls + * that reference this pool_list. Once poison is set to true, we are + * sure that no one else will touch hot/cold lists. The only possible + * race is when at the same moment a mem_put() is adding a new item + * to the hot list. We protect from that by taking pool_list->lock. + * After that we don't need the lock to destroy the hot/cold lists. */ + pthread_spin_lock(&pool_list->lock); + pool_list->poison = true; + pthread_spin_unlock(&pool_list->lock); + + for (i = 0; i < NPOOLS; i++) { + pt_pool = &pool_list->pools[i]; + + free_obj_list(pt_pool->hot_list); + pt_pool->hot_list = NULL; + + free_obj_list(pt_pool->cold_list); + pt_pool->cold_list = NULL; + } + + pthread_mutex_lock(&pool_free_lock); + list_add(&pool_list->thr_list, &pool_free_threads); + pthread_mutex_unlock(&pool_free_lock); + + thread_pool_list = NULL; + } } static __attribute__((constructor)) void @@ -530,46 +567,30 @@ mem_pools_preinit(void) pool_list_size = sizeof(per_thread_pool_list_t) + sizeof(per_thread_pool_t) * (NPOOLS - 1); - init_done = GF_MEMPOOL_INIT_PREINIT; + init_done = GF_MEMPOOL_INIT_EARLY; } -/* Use mem_pools_init_early() function for basic initialization. There will be - * no cleanup done by the pool_sweeper thread until mem_pools_init_late() has - * been called. Calling mem_get() will be possible after this function has - * setup the basic structures. */ -void -mem_pools_init_early(void) +static __attribute__((destructor)) void +mem_pools_postfini(void) { - pthread_mutex_lock(&init_mutex); - /* Use a pthread_key destructor to clean up when a thread exits. + /* TODO: This function should destroy all per thread memory pools that + * are still alive, but this is not possible right now because glibc + * starts calling destructors as soon as exit() is called, and + * gluster doesn't ensure that all threads have been stopped before + * calling exit(). Existing threads would crash when they try to use + * memory or they terminate if we destroy things here. * - * We won't increase init_count here, that is only done when the - * pool_sweeper thread is started too. - */ - if (init_done == GF_MEMPOOL_INIT_PREINIT || - init_done == GF_MEMPOOL_INIT_DESTROY) { - /* key has not been created yet */ - if (pthread_key_create(&pool_key, pool_destructor) != 0) { - gf_log("mem-pool", GF_LOG_CRITICAL, - "failed to initialize mem-pool key"); - } - - init_done = GF_MEMPOOL_INIT_EARLY; - } else { - gf_log("mem-pool", GF_LOG_CRITICAL, - "incorrect order of mem-pool initialization " - "(init_done=%d)", - init_done); - } - - pthread_mutex_unlock(&init_mutex); + * When we propertly terminate all threads, we can add the needed + * code here. Till then we need to leave the memory allocated. Most + * probably this function will be executed on process termination, + * so the memory will be released anyway by the system. */ } -/* Call mem_pools_init_late() once threading has been configured completely. - * This prevent the pool_sweeper thread from getting killed once the main() - * thread exits during deamonizing. */ +/* Call mem_pools_init() once threading has been configured completely. This + * prevent the pool_sweeper thread from getting killed once the main() thread + * exits during deamonizing. */ void -mem_pools_init_late(void) +mem_pools_init(void) { pthread_mutex_lock(&init_mutex); if ((init_count++) == 0) { @@ -588,56 +609,27 @@ mem_pools_fini(void) switch (init_count) { case 0: /* - * If init_count is already zero (as e.g. if somebody called - * this before mem_pools_init_late) then the sweeper was - * probably never even started so we don't need to stop it. - * Even if there's some crazy circumstance where there is a - * sweeper but init_count is still zero, that just means we'll - * leave it running. Not perfect, but far better than any - * known alternative. + * If init_count is already zero (as e.g. if somebody called this + * before mem_pools_init) then the sweeper was probably never even + * started so we don't need to stop it. Even if there's some crazy + * circumstance where there is a sweeper but init_count is still + * zero, that just means we'll leave it running. Not perfect, but + * far better than any known alternative. */ break; case 1: { - per_thread_pool_list_t *pool_list; - per_thread_pool_list_t *next_pl; - unsigned int i; - - /* if only mem_pools_init_early() was called, sweeper_tid will - * be invalid and the functions will error out. That is not - * critical. In all other cases, the sweeper_tid will be valid - * and the thread gets stopped. */ + /* if mem_pools_init() was not called, sweeper_tid will be invalid + * and the functions will error out. That is not critical. In all + * other cases, the sweeper_tid will be valid and the thread gets + * stopped. */ (void)pthread_cancel(sweeper_tid); (void)pthread_join(sweeper_tid, NULL); - /* Need to clean the pool_key to prevent further usage of the - * per_thread_pool_list_t structure that is stored for each - * thread. - * This also prevents calling pool_destructor() when a thread - * exits, so there is no chance on a use-after-free of the - * per_thread_pool_list_t structure. */ - (void)pthread_key_delete(pool_key); - - /* free all objects from all pools */ - list_for_each_entry_safe(pool_list, next_pl, &pool_threads, - thr_list) - { - for (i = 0; i < NPOOLS; ++i) { - free_obj_list(pool_list->pools[i].hot_list); - free_obj_list(pool_list->pools[i].cold_list); - pool_list->pools[i].hot_list = NULL; - pool_list->pools[i].cold_list = NULL; - } - - list_del(&pool_list->thr_list); - FREE(pool_list); - } - - list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads, - thr_list) - { - list_del(&pool_list->thr_list); - FREE(pool_list); - } + /* There could be threads still running in some cases, so we can't + * destroy pool_lists in use. We can also not destroy unused + * pool_lists because some allocated objects may still be pointing + * to them. */ + mem_pool_thread_destructor(NULL); init_done = GF_MEMPOOL_INIT_DESTROY; /* Fall through. */ @@ -648,26 +640,36 @@ mem_pools_fini(void) pthread_mutex_unlock(&init_mutex); } -#else void -mem_pools_init_early(void) -{ -} -void -mem_pools_init_late(void) -{ -} -void -mem_pools_fini(void) +mem_pool_destroy(struct mem_pool *pool) { + if (!pool) + return; + + /* remove this pool from the owner (glusterfs_ctx_t) */ + LOCK(&pool->ctx->lock); + { + list_del(&pool->owner); + } + UNLOCK(&pool->ctx->lock); + + /* free this pool, but keep the mem_pool_shared */ + GF_FREE(pool); + + /* + * Pools are now permanent, so the mem_pool->pool is kept around. All + * of the objects *in* the pool will eventually be freed via the + * pool-sweeper thread, and this way we don't have to add a lot of + * reference-counting complexity. + */ } -#endif struct mem_pool * mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, unsigned long count, char *name) { - unsigned int i; + unsigned long extra_size, size; + unsigned int power; struct mem_pool *new = NULL; struct mem_pool_shared *pool = NULL; @@ -677,20 +679,33 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, return NULL; } - for (i = 0; i < NPOOLS; ++i) { - if (sizeof_type <= AVAILABLE_SIZE(pools[i].power_of_two)) { - pool = &pools[i]; - break; - } - } - - if (!pool) { + /* This is the overhead we'll have because of memory accounting for each + * memory block. */ + extra_size = sizeof(pooled_obj_hdr_t); + + /* We need to compute the total space needed to hold the data type and + * the header. Given that the smallest block size we have in the pools + * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST). + * However, since this value is only needed to compute its rounded + * logarithm in base 2, and this only depends on the highest bit set, + * we can simply do a bitwise or with the minimum size. We need to + * subtract 1 for correct handling of sizes that are exactly a power + * of 2. */ + size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL); + + /* We compute the logarithm in base 2 rounded up of the resulting size. + * This value will identify which pool we need to use from the pools of + * powers of 2. This is equivalent to finding the position of the highest + * bit set. */ + power = sizeof(size) * 8 - __builtin_clzl(size); + if (power > POOL_LARGEST) { gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "invalid argument"); return NULL; } + pool = &pools[power - POOL_SMALLEST]; - new = GF_CALLOC(sizeof(struct mem_pool), 1, gf_common_mt_mem_pool); + new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool); if (!new) return NULL; @@ -698,8 +713,13 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, new->sizeof_type = sizeof_type; new->count = count; new->name = name; + new->xl_name = THIS->name; new->pool = pool; GF_ATOMIC_INIT(new->active, 0); +#ifdef DEBUG + GF_ATOMIC_INIT(new->hit, 0); + GF_ATOMIC_INIT(new->miss, 0); +#endif INIT_LIST_HEAD(&new->owner); LOCK(&ctx->lock); @@ -711,36 +731,13 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, return new; } -void * -mem_get0(struct mem_pool *mem_pool) -{ - void *ptr = NULL; - - if (!mem_pool) { - gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, - "invalid argument"); - return NULL; - } - - ptr = mem_get(mem_pool); - if (ptr) { -#if defined(GF_DISABLE_MEMPOOL) - memset(ptr, 0, mem_pool->sizeof_type); -#else - memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two)); -#endif - } - - return ptr; -} - per_thread_pool_list_t * mem_get_pool_list(void) { per_thread_pool_list_t *pool_list; unsigned int i; - pool_list = pthread_getspecific(pool_key); + pool_list = thread_pool_list; if (pool_list) { return pool_list; } @@ -754,7 +751,7 @@ mem_get_pool_list(void) (void)pthread_mutex_unlock(&pool_free_lock); if (!pool_list) { - pool_list = CALLOC(pool_list_size, 1); + pool_list = MALLOC(pool_list_size); if (!pool_list) { return NULL; } @@ -768,74 +765,114 @@ mem_get_pool_list(void) } } + /* There's no need to take pool_list->lock, because this is already an + * atomic operation and we don't need to synchronize it with any change + * in hot/cold lists. */ + pool_list->poison = false; + (void)pthread_mutex_lock(&pool_lock); - pool_list->poison = 0; list_add(&pool_list->thr_list, &pool_threads); (void)pthread_mutex_unlock(&pool_lock); - (void)pthread_setspecific(pool_key, pool_list); + thread_pool_list = pool_list; + + /* Ensure that all memory objects associated to the new pool_list are + * destroyed when the thread terminates. */ + gf_thread_needs_cleanup(); + return pool_list; } -pooled_obj_hdr_t * -mem_get_from_pool(per_thread_pool_t *pt_pool) +static pooled_obj_hdr_t * +mem_get_from_pool(struct mem_pool *mem_pool) { + per_thread_pool_list_t *pool_list; + per_thread_pool_t *pt_pool; pooled_obj_hdr_t *retval; +#ifdef DEBUG + gf_boolean_t hit = _gf_true; +#endif + + pool_list = mem_get_pool_list(); + if (!pool_list || pool_list->poison) { + return NULL; + } + + pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST]; + + (void)pthread_spin_lock(&pool_list->lock); retval = pt_pool->hot_list; if (retval) { - GF_ATOMIC_INC(pt_pool->parent->allocs_hot); pt_pool->hot_list = retval->next; - return retval; + (void)pthread_spin_unlock(&pool_list->lock); + GF_ATOMIC_INC(pt_pool->parent->allocs_hot); + } else { + retval = pt_pool->cold_list; + if (retval) { + pt_pool->cold_list = retval->next; + (void)pthread_spin_unlock(&pool_list->lock); + GF_ATOMIC_INC(pt_pool->parent->allocs_cold); + } else { + (void)pthread_spin_unlock(&pool_list->lock); + GF_ATOMIC_INC(pt_pool->parent->allocs_stdc); + retval = malloc(1 << pt_pool->parent->power_of_two); +#ifdef DEBUG + hit = _gf_false; +#endif + } } - retval = pt_pool->cold_list; - if (retval) { - GF_ATOMIC_INC(pt_pool->parent->allocs_cold); - pt_pool->cold_list = retval->next; - return retval; + if (retval != NULL) { + retval->pool = mem_pool; + retval->power_of_two = mem_pool->pool->power_of_two; +#ifdef DEBUG + if (hit == _gf_true) + GF_ATOMIC_INC(mem_pool->hit); + else + GF_ATOMIC_INC(mem_pool->miss); +#endif + retval->magic = GF_MEM_HEADER_MAGIC; + retval->pool_list = pool_list; } - GF_ATOMIC_INC(pt_pool->parent->allocs_stdc); - return malloc(1 << pt_pool->parent->power_of_two); + return retval; } +#endif /* GF_DISABLE_MEMPOOL */ + void * -mem_get(struct mem_pool *mem_pool) +mem_get0(struct mem_pool *mem_pool) { + void *ptr = mem_get(mem_pool); + if (ptr) { #if defined(GF_DISABLE_MEMPOOL) - return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool); + memset(ptr, 0, mem_pool->sizeof_type); #else - per_thread_pool_list_t *pool_list; - per_thread_pool_t *pt_pool; - pooled_obj_hdr_t *retval; + memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two)); +#endif + } + + return ptr; +} +void * +mem_get(struct mem_pool *mem_pool) +{ if (!mem_pool) { gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "invalid argument"); return NULL; } - pool_list = mem_get_pool_list(); - if (!pool_list || pool_list->poison) { - return NULL; - } - - (void)pthread_spin_lock(&pool_list->lock); - pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST]; - retval = mem_get_from_pool(pt_pool); - +#if defined(GF_DISABLE_MEMPOOL) + return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool); +#else + pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool); if (!retval) { - (void)pthread_spin_unlock(&pool_list->lock); return NULL; } - retval->magic = GF_MEM_HEADER_MAGIC; - retval->pool = mem_pool; - retval->pool_list = pool_list; - retval->power_of_two = mem_pool->pool->power_of_two; - (void)pthread_spin_unlock(&pool_list->lock); - GF_ATOMIC_INC(mem_pool->active); return retval + 1; @@ -863,40 +900,33 @@ mem_put(void *ptr) /* Not one of ours; don't touch it. */ return; } + + if (!hdr->pool_list) { + gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL, + LG_MSG_INVALID_ARG, + "invalid argument hdr->pool_list NULL"); + return; + } + pool_list = hdr->pool_list; pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST]; - GF_ATOMIC_DEC(hdr->pool->active); + if (hdr->pool) + GF_ATOMIC_DEC(hdr->pool->active); - (void)pthread_spin_lock(&pool_list->lock); hdr->magic = GF_MEM_INVALID_MAGIC; - hdr->next = pt_pool->hot_list; - pt_pool->hot_list = hdr; - GF_ATOMIC_INC(pt_pool->parent->frees_to_list); - (void)pthread_spin_unlock(&pool_list->lock); -#endif /* GF_DISABLE_MEMPOOL */ -} -void -mem_pool_destroy(struct mem_pool *pool) -{ - if (!pool) - return; - - /* remove this pool from the owner (glusterfs_ctx_t) */ - LOCK(&pool->ctx->lock); - { - list_del(&pool->owner); + (void)pthread_spin_lock(&pool_list->lock); + if (!pool_list->poison) { + hdr->next = pt_pool->hot_list; + pt_pool->hot_list = hdr; + (void)pthread_spin_unlock(&pool_list->lock); + GF_ATOMIC_INC(pt_pool->parent->frees_to_list); + } else { + /* If the owner thread of this element has terminated, we simply + * release its memory. */ + (void)pthread_spin_unlock(&pool_list->lock); + free(hdr); } - UNLOCK(&pool->ctx->lock); - - /* free this pool, but keep the mem_pool_shared */ - GF_FREE(pool); - - /* - * Pools are now permanent, so the mem_pool->pool is kept around. All - * of the objects *in* the pool will eventually be freed via the - * pool-sweeper thread, and this way we don't have to add a lot of - * reference-counting complexity. - */ +#endif /* GF_DISABLE_MEMPOOL */ } diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h deleted file mode 100644 index 4f6ca4e0e84..00000000000 --- a/libglusterfs/src/mem-types.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __MEM_TYPES_H__ -#define __MEM_TYPES_H__ - -enum gf_common_mem_types_ { - gf_common_mt_call_stub_t, - gf_common_mt_dnscache6, - gf_common_mt_data_pair_t, - gf_common_mt_data_t, - gf_common_mt_dict_t, - gf_common_mt_event_pool, - gf_common_mt_reg, - gf_common_mt_pollfd, - gf_common_mt_epoll_event, - gf_common_mt_fdentry_t, - gf_common_mt_fdtable_t, - gf_common_mt_fd_t, - gf_common_mt_fd_ctx, - gf_common_mt_gf_dirent_t, - gf_common_mt_glusterfs_ctx_t, - gf_common_mt_dentry_t, - gf_common_mt_inode_t, - gf_common_mt_inode_ctx, - gf_common_mt_list_head, - gf_common_mt_inode_table_t, - gf_common_mt_xlator_t, - gf_common_mt_xlator_list_t, - gf_common_mt_log_msg, - gf_common_mt_client_log, - gf_common_mt_volume_opt_list_t, - gf_common_mt_gf_hdr_common_t, - gf_common_mt_call_frame_t, - gf_common_mt_call_stack_t, - gf_common_mt_gf_timer_t, - gf_common_mt_gf_timer_registry_t, - gf_common_mt_transport, - gf_common_mt_transport_msg, - gf_common_mt_auth_handle_t, - gf_common_mt_iobuf, - gf_common_mt_iobuf_arena, - gf_common_mt_iobref, - gf_common_mt_iobuf_pool, - gf_common_mt_iovec, - gf_common_mt_memdup, - gf_common_mt_asprintf, - gf_common_mt_strdup, - gf_common_mt_socket_private_t, - gf_common_mt_ioq, - gf_common_mt_transport_t, - gf_common_mt_socket_local_t, - gf_common_mt_char, - gf_common_mt_rbthash_table_t, - gf_common_mt_rbthash_bucket, - gf_common_mt_mem_pool, - gf_common_mt_long, - gf_common_mt_rpcsvc_auth_list, - gf_common_mt_rpcsvc_t, - gf_common_mt_rpcsvc_conn_t, - gf_common_mt_rpcsvc_program_t, - gf_common_mt_rpcsvc_listener_t, - gf_common_mt_rpcsvc_wrapper_t, - gf_common_mt_rpcsvc_stage_t, - gf_common_mt_rpcclnt_t, - gf_common_mt_rpcclnt_savedframe_t, - gf_common_mt_rpc_trans_t, - gf_common_mt_rpc_trans_pollin_t, - gf_common_mt_rpc_trans_handover_t, - gf_common_mt_rpc_trans_reqinfo_t, - gf_common_mt_rpc_trans_rsp_t, - gf_common_mt_glusterfs_graph_t, - gf_common_mt_rdma_private_t, - gf_common_mt_rdma_ioq_t, - gf_common_mt_rpc_transport_t, - gf_common_mt_rdma_local_t, - gf_common_mt_rdma_post_t, - gf_common_mt_qpent, - gf_common_mt_rdma_device_t, - gf_common_mt_rdma_context_t, - gf_common_mt_sge, - gf_common_mt_rpcclnt_cb_program_t, - gf_common_mt_libxl_marker_local, - gf_common_mt_graph_buf, - gf_common_mt_trie_trie, - gf_common_mt_trie_data, - gf_common_mt_trie_node, - gf_common_mt_trie_buf, - gf_common_mt_trie_end, - gf_common_mt_run_argv, - gf_common_mt_run_logbuf, - gf_common_mt_fd_lk_ctx_t, - gf_common_mt_fd_lk_ctx_node_t, - gf_common_mt_buffer_t, - gf_common_mt_circular_buffer_t, - gf_common_mt_eh_t, - gf_common_mt_store_handle_t, - gf_common_mt_store_iter_t, - gf_common_mt_drc_client_t, - gf_common_mt_drc_globals_t, - gf_common_mt_drc_rbtree_node_t, - gf_common_mt_iov_base_t, - gf_common_mt_groups_t, - gf_common_mt_cliententry_t, - gf_common_mt_clienttable_t, - gf_common_mt_client_t, - gf_common_mt_client_ctx, - gf_common_mt_lock_table, - gf_common_mt_locker, - gf_common_mt_auxgids, - gf_common_mt_syncopctx, - gf_common_mt_iobrefs, - gf_common_mt_gsync_status_t, - gf_common_mt_uuid_t, - gf_common_mt_mgmt_v3_lock_obj_t, - gf_common_mt_txn_opinfo_obj_t, - gf_common_mt_strfd_t, - gf_common_mt_strfd_data_t, - gf_common_mt_regex_t, - gf_common_mt_ereg, - gf_common_mt_wr, - gf_common_mt_rdma_arena_mr, - gf_common_mt_dnscache = 115, - gf_common_mt_dnscache_entry = 116, - gf_common_mt_parser_t, - gf_common_quota_meta_t, - /*related to gfdb library*/ - gfdb_mt_time_t, - gf_mt_sql_cbk_args_t, - gf_mt_gfdb_query_record_t, - gf_mt_gfdb_link_info_t, - gf_mt_gfdb_db_operations_t, - gf_mt_sql_connection_t, - gf_mt_sql_conn_node_t, - gf_mt_db_conn_node_t, - gf_mt_db_connection_t, - gfdb_mt_db_record_t, - /*related to gfdb library*/ - gf_common_mt_rbuf_t, - gf_common_mt_rlist_t, - gf_common_mt_rvec_t, - /* glusterd can load the nfs-xlator dynamically and needs these two */ - gf_common_mt_nfs_netgroups, - gf_common_mt_nfs_exports, - gf_common_mt_gf_brick_spec_t, - gf_common_mt_gf_timer_entry_t, - gf_common_mt_int, - gf_common_mt_pointer, - gf_common_mt_synctask, - gf_common_mt_syncstack, - gf_common_mt_syncenv, - gf_common_mt_scan_data, - gf_common_list_node, - gf_mt_default_args_t, - gf_mt_default_args_cbk_t, - /*used for compound fops*/ - gf_mt_compound_req_t, - gf_mt_compound_rsp_t, - gf_common_mt_tw_ctx, - gf_common_mt_tw_timer_list, - /*lock migration*/ - gf_common_mt_lock_mig, - /* throttle */ - gf_common_mt_tbf_t, - gf_common_mt_tbf_bucket_t, - gf_common_mt_tbf_throttle_t, - gf_common_mt_pthread_t, - gf_common_ping_local_t, - gf_common_volfile_t, - gf_common_mt_mgmt_v3_lock_timer_t, - gf_common_mt_server_cmdline_t, - gf_common_mt_end -}; -#endif diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c index 041b70acf7a..fbb68dc8622 100644 --- a/libglusterfs/src/monitoring.c +++ b/libglusterfs/src/monitoring.c @@ -8,9 +8,9 @@ cases as published by the Free Software Foundation. */ -#include "monitoring.h" -#include "xlator.h" -#include "syscall.h" +#include "glusterfs/monitoring.h" +#include "glusterfs/xlator.h" +#include "glusterfs/syscall.h" #include <stdlib.h> @@ -34,10 +34,7 @@ dump_mem_acct_details(xlator_t *xl, int fd) mem_rec = &xl->mem_acct->rec[i]; if (mem_rec->num_allocs == 0) continue; - dprintf(fd, - "# %s, %" GF_PRI_SIZET ", %u, %" GF_PRI_SIZET - ", %u," - " %u\n", + dprintf(fd, "# %s, %" PRIu64 ", %u, %" PRIu64 ", %u, %" PRIu64 "\n", mem_rec->typestr, mem_rec->size, mem_rec->num_allocs, mem_rec->max_size, mem_rec->max_num_allocs, mem_rec->total_allocs); @@ -84,45 +81,47 @@ dump_latency_and_count(xlator_t *xl, int fd) uint64_t cbk; uint64_t count; - if (xl->winds) - dprintf(fd, "%s.total.pending-winds.count %lu\n", xl->name, xl->winds); + if (xl->winds) { + dprintf(fd, "%s.total.pending-winds.count %" PRIu64 "\n", xl->name, + xl->winds); + } /* Need 'fuse' data, and don't need all the old graph info */ if ((xl != xl->ctx->master) && (xl->ctx->active != xl->graph)) return; count = GF_ATOMIC_GET(xl->stats.total.count); - dprintf(fd, "%s.total.fop-count %lu\n", xl->name, count); + dprintf(fd, "%s.total.fop-count %" PRIu64 "\n", xl->name, count); count = GF_ATOMIC_GET(xl->stats.interval.count); - dprintf(fd, "%s.interval.fop-count %lu\n", xl->name, count); + dprintf(fd, "%s.interval.fop-count %" PRIu64 "\n", xl->name, count); GF_ATOMIC_INIT(xl->stats.interval.count, 0); for (index = 0; index < GF_FOP_MAXVALUE; index++) { fop = GF_ATOMIC_GET(xl->stats.total.metrics[index].fop); if (fop) { - dprintf(fd, "%s.total.%s.count %lu\n", xl->name, gf_fop_list[index], - fop); + dprintf(fd, "%s.total.%s.count %" PRIu64 "\n", xl->name, + gf_fop_list[index], fop); } fop = GF_ATOMIC_GET(xl->stats.interval.metrics[index].fop); if (fop) { - dprintf(fd, "%s.interval.%s.count %lu\n", xl->name, + dprintf(fd, "%s.interval.%s.count %" PRIu64 "\n", xl->name, gf_fop_list[index], fop); } cbk = GF_ATOMIC_GET(xl->stats.interval.metrics[index].cbk); if (cbk) { - dprintf(fd, "%s.interval.%s.fail_count %lu\n", xl->name, + dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name, gf_fop_list[index], cbk); } - if (xl->stats.interval.latencies[index].count != 0.0) { + if (xl->stats.interval.latencies[index].count != 0) { dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name, gf_fop_list[index], - (xl->stats.interval.latencies[index].total / + (((double)xl->stats.interval.latencies[index].total) / xl->stats.interval.latencies[index].count)); - dprintf(fd, "%s.interval.%s.max %lf\n", xl->name, + dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name, gf_fop_list[index], xl->stats.interval.latencies[index].max); - dprintf(fd, "%s.interval.%s.min %lf\n", xl->name, + dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name, gf_fop_list[index], xl->stats.interval.latencies[index].min); } @@ -136,9 +135,9 @@ dump_latency_and_count(xlator_t *xl, int fd) static inline void dump_call_stack_details(glusterfs_ctx_t *ctx, int fd) { - dprintf(fd, "total.stack.count %lu\n", + dprintf(fd, "total.stack.count %" PRIu64 "\n", GF_ATOMIC_GET(ctx->pool->total_count)); - dprintf(fd, "total.stack.in-flight %lu\n", ctx->pool->cnt); + dprintf(fd, "total.stack.in-flight %" PRIu64 "\n", ctx->pool->cnt); } static inline void @@ -150,11 +149,12 @@ dump_dict_details(glusterfs_ctx_t *ctx, int fd) total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used); total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used); - dprintf(fd, "total.dict.max-pairs-per %lu\n", + dprintf(fd, "total.dict.max-pairs-per %" PRIu64 "\n", GF_ATOMIC_GET(ctx->stats.max_dict_pairs)); - dprintf(fd, "total.dict.pairs-used %lu\n", total_pairs); - dprintf(fd, "total.dict.used %lu\n", total_dicts); - dprintf(fd, "total.dict.average-pairs %lu\n", (total_pairs / total_dicts)); + dprintf(fd, "total.dict.pairs-used %" PRIu64 "\n", total_pairs); + dprintf(fd, "total.dict.used %" PRIu64 "\n", total_dicts); + dprintf(fd, "total.dict.average-pairs %" PRIu64 "\n", + (total_pairs / total_dicts)); } static void @@ -228,12 +228,22 @@ gf_monitor_metrics(glusterfs_ctx_t *ctx) { int ret = -1; int fd = 0; - char *filepath, *dumppath; + char *filepath = NULL, *dumppath = NULL; + + gf_msg_trace("monitoring", 0, "received monitoring request (sig:USR2)"); dumppath = ctx->config.metrics_dumppath; if (dumppath == NULL) { dumppath = GLUSTER_METRICS_DIR; } + ret = mkdir_p(dumppath, 0755, true); + if (ret) { + /* EEXIST is handled in mkdir_p() itself */ + gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR, + "failed to create metrics dir %s (%s)", dumppath, + strerror(errno)); + return NULL; + } ret = gf_asprintf(&filepath, "%s/gmetrics.XXXXXX", dumppath); if (ret < 0) { diff --git a/libglusterfs/src/options.c b/libglusterfs/src/options.c index a96828447fc..f6b5aa0ea23 100644 --- a/libglusterfs/src/options.c +++ b/libglusterfs/src/options.c @@ -10,9 +10,9 @@ #include <fnmatch.h> -#include "xlator.h" -#include "defaults.h" -#include "libglusterfs-messages.h" +#include "glusterfs/xlator.h" +#include "glusterfs/defaults.h" +#include "glusterfs/libglusterfs-messages.h" #define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL) @@ -25,7 +25,8 @@ xlator_option_validate_path(xlator_t *xl, const char *key, const char *value, if (strstr(value, "../")) { snprintf(errstr, 256, "invalid path given '%s'", value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -35,7 +36,8 @@ xlator_option_validate_path(xlator_t *xl, const char *key, const char *value, "option %s %s: '%s' is not an " "absolute path name", key, value, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -59,7 +61,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value, if (gf_string2longlong(value, &inputll) != 0) { snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"", value, key); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -67,7 +70,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value, if ((inputll == 0) && (gf_string2ulonglong(value, &uinputll) != 0)) { snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"", value, key); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -87,8 +91,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value, "'%lld' in 'option %s %s' is smaller than " "minimum value '%.0f'", inputll, key, value, opt->min); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", - errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } } else if (opt->validate == GF_OPT_VALIDATE_MAX) { @@ -97,8 +101,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value, "'%lld' in 'option %s %s' is greater than " "maximum value '%.0f'", inputll, key, value, opt->max); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", - errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } } else if ((inputll < opt->min) || (inputll > opt->max)) { @@ -106,7 +110,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value, "'%lld' in 'option %s %s' is out of range " "[%.0f - %.0f]", inputll, key, value, opt->min, opt->max); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s", + errstr, NULL); goto out; } @@ -121,15 +126,16 @@ static int xlator_option_validate_sizet(xlator_t *xl, const char *key, const char *value, volume_option_t *opt, char **op_errstr) { - size_t size = 0; + uint64_t size = 0; int ret = 0; char errstr[256]; /* Check the range */ - if (gf_string2bytesize_size(value, &size) != 0) { + if (gf_string2bytesize_uint64(value, &size) != 0) { snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"", value, key); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); ret = -1; goto out; } @@ -144,11 +150,11 @@ xlator_option_validate_sizet(xlator_t *xl, const char *key, const char *value, if ((size < opt->min) || (size > opt->max)) { snprintf(errstr, 256, - "'%" GF_PRI_SIZET - "' in 'option %s %s' " - "is out of range [%.0f - %.0f]", + "'%" PRIu64 + "' in 'option %s %s' is out of range [%.0f - %.0f]", size, key, value, opt->min, opt->max); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s", + errstr, NULL); ret = -1; } @@ -172,7 +178,8 @@ xlator_option_validate_bool(xlator_t *xl, const char *key, const char *value, if (gf_string2boolean(value, &is_valid) != 0) { snprintf(errstr, 256, "option %s %s: '%s' is not a valid boolean value", key, value, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -207,7 +214,8 @@ xlator_option_validate_xlator(xlator_t *xl, const char *key, const char *value, if (!xlopt) { snprintf(errstr, 256, "option %s %s: '%s' is not a valid volume name", key, value, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -218,7 +226,7 @@ out: return ret; } -void +static void set_error_str(char *errstr, size_t len, volume_option_t *opt, const char *key, const char *value) { @@ -240,18 +248,15 @@ set_error_str(char *errstr, size_t len, volume_option_t *opt, const char *key, return; } -int +static int is_all_whitespaces(const char *value) { int i = 0; - size_t len = 0; if (value == NULL) return -1; - len = strlen(value); - - for (i = 0; i < len; i++) { + for (i = 0; value[i] != '\0'; i++) { if (value[i] == ' ') continue; else @@ -267,9 +272,6 @@ xlator_option_validate_str(xlator_t *xl, const char *key, const char *value, { int ret = -1; int i = 0; - char errstr[4096] = { - 0, - }; /* Check if the '*str' is valid */ if (GF_OPTION_LIST_EMPTY(opt)) { @@ -309,9 +311,11 @@ xlator_option_validate_str(xlator_t *xl, const char *key, const char *value, out: if (ret) { + char errstr[4096]; set_error_str(errstr, sizeof(errstr), opt, key, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); if (op_errstr) *op_errstr = gf_strdup(errstr); } @@ -330,7 +334,8 @@ xlator_option_validate_percent(xlator_t *xl, const char *key, const char *value, if (gf_string2percent(value, &percent) != 0) { snprintf(errstr, 256, "invalid percent format \"%s\" in \"option %s\"", value, key); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -338,7 +343,8 @@ xlator_option_validate_percent(xlator_t *xl, const char *key, const char *value, snprintf(errstr, 256, "'%lf' in 'option %s %s' is out of range [0 - 100]", percent, key, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s", + errstr, NULL); goto out; } @@ -384,8 +390,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key, "'%lf' in 'option %s %s' is out" " of range [0 - 100]", size, key, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", - errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, + "error=%s", errstr, NULL); goto out; } ret = 0; @@ -400,8 +406,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key, " %s' should not be fractional value. Use " "valid unsigned integer value.", size, key, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", - errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -419,8 +425,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key, "'%lf' in 'option %s %s'" " is out of range [%.0f - %.0f]", size, key, value, opt->min, opt->max); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", - errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s", + errstr, NULL); goto out; } ret = 0; @@ -431,7 +437,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key, snprintf(errstr, 256, "invalid number format \"%s\" in \"option %s\"", value, key); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", errstr, + NULL); out: if (ret && op_errstr) @@ -453,7 +460,8 @@ xlator_option_validate_time(xlator_t *xl, const char *key, const char *value, "invalid time format \"%s\" in " "\"option %s\"", value, key); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -472,7 +480,8 @@ xlator_option_validate_time(xlator_t *xl, const char *key, const char *value, "' in 'option %s %s' is " "out of range [%.0f - %.0f]", input_time, key, value, opt->min, opt->max); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s", + errstr, NULL); goto out; } @@ -495,7 +504,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value, if (gf_string2double(value, &input) != 0) { snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"", value, key); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } @@ -515,8 +525,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value, "'%f' in 'option %s %s' is smaller than " "minimum value '%f'", input, key, value, opt->min); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", - errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } } else if (opt->validate == GF_OPT_VALIDATE_MAX) { @@ -525,8 +535,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value, "'%f' in 'option %s %s' is greater than " "maximum value '%f'", input, key, value, opt->max); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", - errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); goto out; } } else if ((input < opt->min) || (input > opt->max)) { @@ -534,7 +544,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value, "'%f' in 'option %s %s' is out of range " "[%f - %f]", input, key, value, opt->min, opt->max); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s", + errstr, NULL); goto out; } @@ -552,12 +563,11 @@ xlator_option_validate_addr(xlator_t *xl, const char *key, const char *value, int ret = -1; char errstr[256]; - if (!valid_internet_address((char *)value, _gf_false)) { - snprintf(errstr, 256, - "option %s %s: '%s' is not a valid internet-address," - " it does not conform to standards.", - key, value, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + if (!valid_internet_address((char *)value, _gf_false, _gf_false)) { + snprintf(errstr, 256, "option %s %s: Can not parse %s address", key, + value, value); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); if (op_errstr) *op_errstr = gf_strdup(errstr); } @@ -583,9 +593,6 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key, char *addr_list = NULL; char *addr = NULL; char *dir = NULL; - char errstr[4096] = { - 0, - }; dup_val = gf_strdup(value); if (!dup_val) @@ -595,7 +602,7 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key, /* Possible old format, handle it for back-ward compatibility */ addr_tok = strtok_r(dup_val, ",", &save_ptr); while (addr_tok) { - if (!valid_internet_address(addr_tok, _gf_true)) + if (!valid_internet_address(addr_tok, _gf_true, _gf_true)) goto out; addr_tok = strtok_r(NULL, ",", &save_ptr); @@ -630,7 +637,7 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key, if (addr_tok == NULL) goto out; while (addr_tok) { - if (!valid_internet_address(addr_tok, _gf_true)) + if (!valid_internet_address(addr_tok, _gf_true, _gf_true)) goto out; addr_tok = strtok_r(NULL, "|", &save_ptr); @@ -646,11 +653,13 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key, out: if (ret) { + char errstr[4096]; snprintf(errstr, sizeof(errstr), "option %s %s: '%s' is not " "a valid internet-address-list", key, value, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); if (op_errstr) *op_errstr = gf_strdup(errstr); } @@ -668,9 +677,6 @@ xlator_option_validate_mntauth(xlator_t *xl, const char *key, const char *value, char *dup_val = NULL; char *addr_tok = NULL; char *save_ptr = NULL; - char errstr[4096] = { - 0, - }; dup_val = gf_strdup(value); if (!dup_val) @@ -689,11 +695,13 @@ xlator_option_validate_mntauth(xlator_t *xl, const char *key, const char *value, out: if (ret) { + char errstr[4096]; snprintf(errstr, sizeof(errstr), "option %s %s: '%s' is not " "a valid mount-auth-address", key, value, value); - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", + errstr, NULL); if (op_errstr) *op_errstr = gf_strdup(errstr); } @@ -706,12 +714,12 @@ out: static int gf_validate_size(const char *sizestr, volume_option_t *opt) { - size_t value = 0; + uint64_t value = 0; int ret = 0; GF_ASSERT(opt); - if (gf_string2bytesize_size(sizestr, &value) != 0 || value < opt->min || + if (gf_string2bytesize_uint64(sizestr, &value) != 0 || value < opt->min || value % 512) { ret = -1; goto out; @@ -759,20 +767,16 @@ validate_list_elements(const char *string, volume_option_t *opt, key = strtok_r(str_ptr, ":", &substr_sav); if (!key || (key_validator && key_validator(key))) { ret = -1; - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY, - "invalid list '%s', key " - "'%s' not valid.", - string, key); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY, + "list=%s", string, "key=%s", key ? key : "", NULL); goto out; } value = strtok_r(NULL, ":", &substr_sav); if (!value || (value_validator && value_validator(value, opt))) { ret = -1; - gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY, - "invalid list '%s', " - "value '%s' not valid.", - string, key); + gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY, + "list=%s", string, "value=%s", key, NULL); goto out; } @@ -877,8 +881,8 @@ xlator_option_validate(xlator_t *xl, char *key, char *value, }; if (opt->type > GF_OPTION_TYPE_MAX) { - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, - "unknown option type '%d'", opt->type); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_UNKNOWN_OPTION_TYPE, + "type=%d", opt->type, NULL); goto out; } @@ -894,7 +898,6 @@ xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key) { volume_option_t *opt = NULL; volume_opt_list_t *opt_list = NULL; - volume_option_t *found = NULL; int index = 0; int i = 0; char *cmp_key = NULL; @@ -911,13 +914,12 @@ xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key) if (!cmp_key) break; if (fnmatch(cmp_key, key, FNM_NOESCAPE) == 0) { - found = &opt[index]; - goto out; + return &opt[index]; } } } -out: - return found; + + return NULL; } volume_option_t * @@ -961,18 +963,16 @@ xl_opt_validate(dict_t *dict, char *key, data_t *value, void *data) ret = xlator_option_validate(xl, key, value->data, opt, &errstr); if (ret) - gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_VALIDATE_RETURNS, - "validate of %s returned %d", key, ret); + gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_VALIDATE_RETURNS, "key=%s", + key, "ret=%d", ret, NULL); if (errstr) /* possible small leak of previously set stub->errstr */ stub->errstr = errstr; if (fnmatch(opt->key[0], key, FNM_NOESCAPE) != 0) { - gf_msg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_INVALID_ENTRY, - "option '%s' is deprecated, preferred is '%s', " - "continuing with correction", - key, opt->key[0]); + gf_smsg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_OPTION_DEPRECATED, "key=%s", + key, "preferred=%s", opt->key[0], NULL); dict_set(dict, opt->key[0], value); dict_del(dict, key); } @@ -1040,9 +1040,8 @@ xlator_validate_rec(xlator_t *xlator, char **op_errstr) while (trav) { if (xlator_validate_rec(trav->xlator, op_errstr)) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_VALIDATE_REC_FAILED, - "validate_rec " - "failed"); + gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_VALIDATE_REC_FAILED, + NULL); goto out; } @@ -1066,8 +1065,8 @@ xlator_validate_rec(xlator_t *xlator, char **op_errstr) THIS = old_THIS; if (ret) { - gf_msg(xlator->name, GF_LOG_INFO, 0, LG_MSG_INVALID_ENTRY, "%s", - *op_errstr); + gf_smsg(xlator->name, GF_LOG_INFO, 0, LG_MSG_INVALID_ENTRY, "%s", + *op_errstr, NULL); goto out; } @@ -1123,6 +1122,7 @@ xlator_reconfigure_rec(xlator_t *old_xl, xlator_t *new_xl) THIS = old_xl; xlator_init_lock(); + handle_default_options(old_xl, new_xl->options); ret = old_xl->reconfigure(old_xl, new_xl->options); xlator_init_unlock(); @@ -1200,18 +1200,18 @@ pc_or_size(char *in, double *out) { double pc = 0; int ret = 0; - size_t size = 0; + uint64_t size = 0; if (gf_string2percent(in, &pc) == 0) { if (pc > 100.0) { - ret = gf_string2bytesize_size(in, &size); + ret = gf_string2bytesize_uint64(in, &size); if (!ret) *out = size; } else { *out = pc; } } else { - ret = gf_string2bytesize_size(in, &size); + ret = gf_string2bytesize_uint64(in, &size); if (!ret) *out = size; } @@ -1223,7 +1223,7 @@ DEFINE_INIT_OPT(uint64_t, uint64, gf_string2uint64); DEFINE_INIT_OPT(int64_t, int64, gf_string2int64); DEFINE_INIT_OPT(uint32_t, uint32, gf_string2uint32); DEFINE_INIT_OPT(int32_t, int32, gf_string2int32); -DEFINE_INIT_OPT(size_t, size, gf_string2bytesize_size); +DEFINE_INIT_OPT(uint64_t, size, gf_string2bytesize_uint64); DEFINE_INIT_OPT(uint64_t, size_uint64, gf_string2bytesize_uint64); DEFINE_INIT_OPT(double, percent, gf_string2percent); DEFINE_INIT_OPT(double, percent_or_size, pc_or_size); @@ -1238,7 +1238,7 @@ DEFINE_RECONF_OPT(uint64_t, uint64, gf_string2uint64); DEFINE_RECONF_OPT(int64_t, int64, gf_string2int64); DEFINE_RECONF_OPT(uint32_t, uint32, gf_string2uint32); DEFINE_RECONF_OPT(int32_t, int32, gf_string2int32); -DEFINE_RECONF_OPT(size_t, size, gf_string2bytesize_size); +DEFINE_RECONF_OPT(uint64_t, size, gf_string2bytesize_uint64); DEFINE_RECONF_OPT(uint64_t, size_uint64, gf_string2bytesize_uint64); DEFINE_RECONF_OPT(double, percent, gf_string2percent); DEFINE_RECONF_OPT(double, percent_or_size, pc_or_size); diff --git a/libglusterfs/src/parse-utils.c b/libglusterfs/src/parse-utils.c index d3fd0963507..4531d5f0170 100644 --- a/libglusterfs/src/parse-utils.c +++ b/libglusterfs/src/parse-utils.c @@ -14,13 +14,11 @@ #include <regex.h> #include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include "parse-utils.h" -#include "mem-pool.h" -#include "common-utils.h" -#include "libglusterfs-messages.h" +#include "glusterfs/parse-utils.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/libglusterfs-messages.h" /** * parser_init: Initialize a parser with the a string to parse and diff --git a/libglusterfs/src/quota-common-utils.c b/libglusterfs/src/quota-common-utils.c index 7e271ad7d07..804e2f0ad4b 100644 --- a/libglusterfs/src/quota-common-utils.c +++ b/libglusterfs/src/quota-common-utils.c @@ -8,12 +8,12 @@ cases as published by the Free Software Foundation. */ -#include "dict.h" -#include "logging.h" -#include "byte-order.h" -#include "quota-common-utils.h" -#include "common-utils.h" -#include "libglusterfs-messages.h" +#include "glusterfs/dict.h" +#include "glusterfs/logging.h" +#include "glusterfs/byte-order.h" +#include "glusterfs/quota-common-utils.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/libglusterfs-messages.h" gf_boolean_t quota_meta_is_null(const quota_meta_t *meta) @@ -25,13 +25,13 @@ quota_meta_is_null(const quota_meta_t *meta) } int32_t -quota_data_to_meta(data_t *data, char *key, quota_meta_t *meta) +quota_data_to_meta(data_t *data, quota_meta_t *meta) { int32_t ret = -1; quota_meta_t *value = NULL; int64_t *size = NULL; - if (!data || !key || !meta) + if (!data || !meta) goto out; if (data->len > sizeof(int64_t)) { @@ -66,7 +66,8 @@ out: } int32_t -quota_dict_get_inode_meta(dict_t *dict, char *key, quota_meta_t *meta) +quota_dict_get_inode_meta(dict_t *dict, char *key, const int keylen, + quota_meta_t *meta) { int32_t ret = -1; data_t *data = NULL; @@ -74,11 +75,11 @@ quota_dict_get_inode_meta(dict_t *dict, char *key, quota_meta_t *meta) if (!dict || !key || !meta) goto out; - data = dict_get(dict, key); + data = dict_getn(dict, key, keylen); if (!data || !data->data) goto out; - ret = quota_data_to_meta(data, key, meta); + ret = quota_data_to_meta(data, meta); out: @@ -86,11 +87,12 @@ out: } int32_t -quota_dict_get_meta(dict_t *dict, char *key, quota_meta_t *meta) +quota_dict_get_meta(dict_t *dict, char *key, const int keylen, + quota_meta_t *meta) { int32_t ret = -1; - ret = quota_dict_get_inode_meta(dict, key, meta); + ret = quota_dict_get_inode_meta(dict, key, keylen, meta); if (ret == -2) ret = 0; diff --git a/libglusterfs/src/rbthash.c b/libglusterfs/src/rbthash.c index 0068b7b881e..c90b5a21f44 100644 --- a/libglusterfs/src/rbthash.c +++ b/libglusterfs/src/rbthash.c @@ -8,12 +8,12 @@ cases as published by the Free Software Foundation. */ -#include "rbthash.h" +#include "glusterfs/rbthash.h" #include "rb.h" -#include "locking.h" -#include "mem-pool.h" -#include "logging.h" -#include "libglusterfs-messages.h" +#include "glusterfs/locking.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/logging.h" +#include "glusterfs/libglusterfs-messages.h" #include <pthread.h> #include <string.h> @@ -56,9 +56,8 @@ __rbthash_init_buckets(rbthash_table_t *tbl, int buckets) tbl->buckets[i].bucket = rb_create( (rb_comparison_func *)rbthash_comparator, tbl, NULL); if (!tbl->buckets[i].bucket) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RB_TABLE_CREATE_FAILED, - "Failed to " - "create rb table bucket"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RB_TABLE_CREATE_FAILED, + NULL); ret = -1; goto err; } @@ -88,20 +87,17 @@ rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc, int ret = -1; if (!hfunc) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_HASH_FUNC_ERROR, - "Hash function not given"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_HASH_FUNC_ERROR, NULL); return NULL; } if (!entrypool && !expected_entries) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, - "Both mem-pool and expected entries not provided"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_NOT_PROVIDED, NULL); return NULL; } if (entrypool && expected_entries) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, - "Both mem-pool and expected entries are provided"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_PROVIDED, NULL); return NULL; } @@ -132,8 +128,8 @@ rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc, ret = __rbthash_init_buckets(newtab, buckets); if (ret == -1) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_BUCKET_FAILED, - "Failed to init buckets"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_BUCKET_FAILED, + NULL); if (newtab->pool_alloced) mem_pool_destroy(newtab->entrypool); } else { @@ -170,8 +166,8 @@ rbthash_init_entry(rbthash_table_t *tbl, void *data, void *key, int keylen) entry = mem_get(tbl->entrypool); if (!entry) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_ENTRY_FAILED, - "Failed to get entry from mem-pool"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_ENTRY_FAILED, + NULL); goto ret; } @@ -243,8 +239,8 @@ rbthash_insert_entry(rbthash_table_t *tbl, rbthash_entry_t *entry) bucket = rbthash_entry_bucket(tbl, entry); if (!bucket) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED, - "Failed to get bucket"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED, + NULL); goto err; } @@ -252,10 +248,11 @@ rbthash_insert_entry(rbthash_table_t *tbl, rbthash_entry_t *entry) LOCK(&bucket->bucketlock); { if (!rb_probe(bucket->bucket, (void *)entry)) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED, - "Failed to insert" - " entry"); + UNLOCK(&bucket->bucketlock); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED, + NULL); ret = -1; + goto err; } } UNLOCK(&bucket->bucketlock); @@ -275,17 +272,18 @@ rbthash_insert(rbthash_table_t *tbl, void *data, void *key, int keylen) entry = rbthash_init_entry(tbl, data, key, keylen); if (!entry) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_ENTRY_FAILED, - "Failed to init entry"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_ENTRY_FAILED, + NULL); goto err; } ret = rbthash_insert_entry(tbl, entry); if (ret == -1) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED, - "Failed to insert entry"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED, + NULL); rbthash_deinit_entry(tbl, entry); + goto err; } LOCK(&tbl->tablelock); @@ -329,8 +327,8 @@ rbthash_get(rbthash_table_t *tbl, void *key, int keylen) bucket = rbthash_key_bucket(tbl, key, keylen); if (!bucket) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_GET_BUCKET_FAILED, - "Failed to get bucket"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED, + NULL); return NULL; } @@ -363,8 +361,8 @@ rbthash_remove(rbthash_table_t *tbl, void *key, int keylen) bucket = rbthash_key_bucket(tbl, key, keylen); if (!bucket) { - gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED, - "Failed to get bucket"); + gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED, + NULL); return NULL; } diff --git a/libglusterfs/src/refcount.c b/libglusterfs/src/refcount.c index a522d86a677..d5a5a82fa0f 100644 --- a/libglusterfs/src/refcount.c +++ b/libglusterfs/src/refcount.c @@ -8,8 +8,8 @@ cases as published by the Free Software Foundation. */ -#include "common-utils.h" -#include "refcount.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/refcount.h" #ifndef REFCOUNT_NEEDS_LOCK diff --git a/libglusterfs/src/rot-buffs.c b/libglusterfs/src/rot-buffs.c index c5147321c60..260bf16ecea 100644 --- a/libglusterfs/src/rot-buffs.c +++ b/libglusterfs/src/rot-buffs.c @@ -10,10 +10,10 @@ #include <math.h> -#include "mem-types.h" -#include "mem-pool.h" +#include "glusterfs/mem-types.h" +#include "glusterfs/mem-pool.h" -#include "rot-buffs.h" +#include "glusterfs/rot-buffs.h" /** * Producer-Consumer based on top of rotational buffers. @@ -96,8 +96,11 @@ rvec_in_watermark_range(rbuf_list_t *rlist) static void rbuf_reset_rvec(rbuf_iovec_t *rvec) { + GF_VALIDATE_OR_GOTO("libglusterfs", rvec, err); /* iov_base is _never_ modified */ rvec->iov.iov_len = 0; +err: + return; } /* TODO: alloc multiple rbuf_iovec_t */ diff --git a/libglusterfs/src/run.c b/libglusterfs/src/run.c index 41275066b20..58f95a7e610 100644 --- a/libglusterfs/src/run.c +++ b/libglusterfs/src/run.c @@ -23,7 +23,7 @@ #include <assert.h> #include <signal.h> #include <sys/wait.h> -#include "syscall.h" +#include "glusterfs/syscall.h" /* * Following defines are available for helping development: @@ -92,12 +92,12 @@ close_fds_except(int *fdv, size_t count) #define GF_LINUX_HOST_OS #endif #else /* ! RUN_STANDALONE || RUN_DO_DEMO */ -#include "glusterfs.h" -#include "common-utils.h" -#include "libglusterfs-messages.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/libglusterfs-messages.h" #endif -#include "run.h" +#include "glusterfs/run.h" void runinit(runner_t *runner) { @@ -132,7 +132,7 @@ runner_insert_arg(runner_t *runner, char *arg) GF_ASSERT(arg); - if (runner->runerr) + if (runner->runerr || !runner->argv) return; for (i = 0; i < runner->argvlen; i++) { @@ -263,8 +263,8 @@ runner_start(runner_t *runner) int i = 0; sigset_t set; - if (runner->runerr) { - errno = runner->runerr; + if (runner->runerr || !runner->argv) { + errno = (runner->runerr) ? runner->runerr : EINVAL; return -1; } diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c index fc8af2ec85c..1531f0da43f 100644 --- a/libglusterfs/src/stack.c +++ b/libglusterfs/src/stack.c @@ -8,15 +8,16 @@ cases as published by the Free Software Foundation. */ -#include "statedump.h" -#include "stack.h" -#include "libglusterfs-messages.h" +#include "glusterfs/statedump.h" +#include "glusterfs/stack.h" +#include "glusterfs/libglusterfs-messages.h" call_frame_t * create_frame(xlator_t *xl, call_pool_t *pool) { call_stack_t *stack = NULL; call_frame_t *frame = NULL; + static uint64_t unique = 0; if (!xl || !pool) { return NULL; @@ -52,6 +53,7 @@ create_frame(xlator_t *xl, call_pool_t *pool) { list_add(&stack->all_frames, &pool->all_frames); pool->cnt++; + stack->unique = unique++; } UNLOCK(&pool->lock); GF_ATOMIC_INC(pool->total_count); @@ -85,9 +87,12 @@ gf_proc_dump_call_frame(call_frame_t *call_frame, const char *key_buf, ...) { char prefix[GF_DUMP_MAX_BUF_LEN]; va_list ap; - call_frame_t my_frame; + call_frame_t my_frame = { + 0, + }; + int ret = -1; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; int len; @@ -157,7 +162,7 @@ gf_proc_dump_call_stack(call_stack_t *call_stack, const char *key_buf, ...) va_list ap; call_frame_t *trav; int32_t i = 1, cnt = 0; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; int len; @@ -324,6 +329,8 @@ gf_proc_dump_call_frame_to_dict(call_frame_t *call_frame, char *prefix, if (tmp_frame.unwind_to) { snprintf(key, sizeof(key), "%s.unwind_to", prefix); ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.unwind_to)); + if (ret) + return; } return; diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c index 5c8f0fc627f..65f0eb5c7f3 100644 --- a/libglusterfs/src/statedump.c +++ b/libglusterfs/src/statedump.c @@ -9,13 +9,11 @@ */ #include <stdarg.h> -#include "glusterfs.h" -#include "logging.h" -#include "iobuf.h" -#include "statedump.h" -#include "stack.h" -#include "common-utils.h" -#include "syscall.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/logging.h" +#include "glusterfs/statedump.h" +#include "glusterfs/stack.h" +#include "glusterfs/syscall.h" #ifdef HAVE_MALLOC_H #include <malloc.h> @@ -89,19 +87,19 @@ gf_proc_dump_set_path(char *dump_options_file) if (!fp) goto out; - ret = fscanf(fp, "%s", buf); + ret = fscanf(fp, "%255s", buf); while (ret != EOF) { key = strtok_r(buf, "=", &saveptr); if (!key) { - ret = fscanf(fp, "%s", buf); + ret = fscanf(fp, "%255s", buf); continue; } value = strtok_r(NULL, "=", &saveptr); if (!value) { - ret = fscanf(fp, "%s", buf); + ret = fscanf(fp, "%255s", buf); continue; } if (!strcmp(key, "path")) { @@ -201,6 +199,40 @@ gf_proc_dump_write(char *key, char *value, ...) return ret; } +void +gf_latency_statedump_and_reset(char *key, gf_latency_t *lat) +{ + /* Doesn't make sense to continue if there are no fops + came in the given interval */ + if (!lat || !lat->count) + return; + gf_proc_dump_write(key, + "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64 + " MAX:%" PRIu64, + (((double)lat->total) / lat->count), lat->count, + lat->total, lat->min, lat->max); + gf_latency_reset(lat); +} + +void +gf_proc_dump_xl_latency_info(xlator_t *xl) +{ + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name); + gf_proc_dump_add_section("%s", key_prefix); + + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]); + + gf_latency_t *lat = &xl->stats.interval.latencies[i]; + + gf_latency_statedump_and_reset(key, lat); + } +} + static void gf_proc_dump_xlator_mem_info(xlator_t *xl) { @@ -221,13 +253,13 @@ gf_proc_dump_xlator_mem_info(xlator_t *xl) gf_proc_dump_add_section("%s.%s - usage-type %s memusage", xl->type, xl->name, xl->mem_acct->rec[i].typestr); - gf_proc_dump_write("size", "%" GF_PRI_SIZET, xl->mem_acct->rec[i].size); + gf_proc_dump_write("size", "%" PRIu64, xl->mem_acct->rec[i].size); gf_proc_dump_write("num_allocs", "%u", xl->mem_acct->rec[i].num_allocs); - gf_proc_dump_write("max_size", "%" GF_PRI_SIZET, + gf_proc_dump_write("max_size", "%" PRIu64, xl->mem_acct->rec[i].max_size); gf_proc_dump_write("max_num_allocs", "%u", xl->mem_acct->rec[i].max_num_allocs); - gf_proc_dump_write("total_allocs", "%u", + gf_proc_dump_write("total_allocs", "%" PRIu64, xl->mem_acct->rec[i].total_allocs); } @@ -255,13 +287,13 @@ gf_proc_dump_xlator_mem_info_only_in_use(xlator_t *xl) gf_proc_dump_add_section("%s.%s - usage-type %d", xl->type, xl->name, i); - gf_proc_dump_write("size", "%" GF_PRI_SIZET, xl->mem_acct->rec[i].size); - gf_proc_dump_write("max_size", "%" GF_PRI_SIZET, + gf_proc_dump_write("size", "%" PRIu64, xl->mem_acct->rec[i].size); + gf_proc_dump_write("max_size", "%" PRIu64, xl->mem_acct->rec[i].max_size); gf_proc_dump_write("num_allocs", "%u", xl->mem_acct->rec[i].num_allocs); gf_proc_dump_write("max_num_allocs", "%u", xl->mem_acct->rec[i].max_num_allocs); - gf_proc_dump_write("total_allocs", "%u", + gf_proc_dump_write("total_allocs", "%" PRIu64, xl->mem_acct->rec[i].total_allocs); } @@ -272,7 +304,7 @@ gf_proc_dump_xlator_mem_info_only_in_use(xlator_t *xl) void gf_proc_dump_mem_info() { -#ifdef HAVE_MALLOC_STATS +#ifdef HAVE_MALLINFO struct mallinfo info; memset(&info, 0, sizeof(struct mallinfo)); @@ -298,7 +330,7 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict) { if (!dict) return; -#ifdef HAVE_MALLOC_STATS +#ifdef HAVE_MALLINFO struct mallinfo info; int ret = -1; @@ -351,26 +383,13 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict) void gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx) { +#ifdef GF_DISABLE_MEMPOOL + gf_proc_dump_write("built with --disable-mempool", " so no memory pools"); +#else struct mem_pool *pool = NULL; gf_proc_dump_add_section("mempool"); -#if defined(OLD_MEM_POOLS) - list_for_each_entry(pool, &ctx->mempool_list, global_list) - { - gf_proc_dump_write("-----", "-----"); - gf_proc_dump_write("pool-name", "%s", pool->name); - gf_proc_dump_write("hot-count", "%d", pool->hot_count); - gf_proc_dump_write("cold-count", "%d", pool->cold_count); - gf_proc_dump_write("padded_sizeof", "%lu", pool->padded_sizeof_type); - gf_proc_dump_write("alloc-count", "%" PRIu64, pool->alloc_count); - gf_proc_dump_write("max-alloc", "%d", pool->max_alloc); - - gf_proc_dump_write("pool-misses", "%" PRIu64, pool->pool_misses); - gf_proc_dump_write("cur-stdalloc", "%d", pool->curr_stdalloc); - gf_proc_dump_write("max-stdalloc", "%d", pool->max_stdalloc); - } -#else LOCK(&ctx->lock); { list_for_each_entry(pool, &ctx->mempool_list, owner) @@ -379,25 +398,24 @@ gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx) gf_proc_dump_write("-----", "-----"); gf_proc_dump_write("pool-name", "%s", pool->name); + gf_proc_dump_write("xlator-name", "%s", pool->xl_name); gf_proc_dump_write("active-count", "%" GF_PRI_ATOMIC, active); gf_proc_dump_write("sizeof-type", "%lu", pool->sizeof_type); gf_proc_dump_write("padded-sizeof", "%d", 1 << pool->pool->power_of_two); - gf_proc_dump_write("size", "%lu", + gf_proc_dump_write("size", "%" PRId64, (1 << pool->pool->power_of_two) * active); gf_proc_dump_write("shared-pool", "%p", pool->pool); } } UNLOCK(&ctx->lock); - - /* TODO: details of (struct mem_pool_shared) pool->pool */ -#endif +#endif /* GF_DISABLE_MEMPOOL */ } void gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict) { -#if defined(OLD_MEM_POOLS) +#ifndef GF_DISABLE_MEMPOOL struct mem_pool *pool = NULL; char key[GF_DUMP_MAX_BUF_LEN] = { 0, @@ -408,51 +426,47 @@ gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict) if (!ctx || !dict) return; - list_for_each_entry(pool, &ctx->mempool_list, global_list) + LOCK(&ctx->lock); { - snprintf(key, sizeof(key), "pool%d.name", count); - ret = dict_set_str(dict, key, pool->name); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.hotcount", count); - ret = dict_set_int32(dict, key, pool->hot_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.coldcount", count); - ret = dict_set_int32(dict, key, pool->cold_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.paddedsizeof", count); - ret = dict_set_uint64(dict, key, pool->padded_sizeof_type); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.alloccount", count); - ret = dict_set_uint64(dict, key, pool->alloc_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.max_alloc", count); - ret = dict_set_int32(dict, key, pool->max_alloc); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.max-stdalloc", count); - ret = dict_set_int32(dict, key, pool->max_stdalloc); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.pool-misses", count); - ret = dict_set_uint64(dict, key, pool->pool_misses); - if (ret) - return; - count++; + list_for_each_entry(pool, &ctx->mempool_list, owner) + { + int64_t active = GF_ATOMIC_GET(pool->active); + + snprintf(key, sizeof(key), "pool%d.name", count); + ret = dict_set_str(dict, key, pool->name); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.active-count", count); + ret = dict_set_uint64(dict, key, active); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.sizeof-type", count); + ret = dict_set_uint64(dict, key, pool->sizeof_type); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.padded-sizeof", count); + ret = dict_set_uint64(dict, key, 1 << pool->pool->power_of_two); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.size", count); + ret = dict_set_uint64(dict, key, + (1 << pool->pool->power_of_two) * active); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.shared-pool", count); + ret = dict_set_static_ptr(dict, key, pool->pool); + if (ret) + goto out; + } } - ret = dict_set_int32(dict, "mempool-count", count); -#endif +out: + UNLOCK(&ctx->lock); +#endif /* !GF_DISABLE_MEMPOOL */ } void @@ -461,17 +475,17 @@ gf_proc_dump_latency_info(xlator_t *xl); void gf_proc_dump_dict_info(glusterfs_ctx_t *ctx) { - uint64_t total_dicts = 0; - uint64_t total_pairs = 0; + int64_t total_dicts = 0; + int64_t total_pairs = 0; total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used); total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used); gf_proc_dump_write("max-pairs-per-dict", "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(ctx->stats.max_dict_pairs)); - gf_proc_dump_write("total-pairs-used", "%lu", total_pairs); - gf_proc_dump_write("total-dicts-used", "%lu", total_dicts); - gf_proc_dump_write("average-pairs-per-dict", "%lu", + gf_proc_dump_write("total-pairs-used", "%" PRId64, total_pairs); + gf_proc_dump_write("total-dicts-used", "%" PRId64, total_dicts); + gf_proc_dump_write("average-pairs-per-dict", "%" PRId64, (total_pairs / total_dicts)); } @@ -487,7 +501,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav) return; if (ctx->measure_latency) - gf_proc_dump_latency_info(trav); + gf_proc_dump_xl_latency_info(trav); gf_proc_dump_xlator_mem_info(trav); @@ -745,21 +759,21 @@ gf_proc_dump_options_init() (void)gf_proc_dump_disable_all_options(); // swallow the errors if setting statedump file path is failed. - ret = gf_proc_dump_set_path(dump_option_file); + (void)gf_proc_dump_set_path(dump_option_file); - ret = fscanf(fp, "%s", buf); + ret = fscanf(fp, "%255s", buf); while (ret != EOF) { key = strtok_r(buf, "=", &saveptr); if (!key) { - ret = fscanf(fp, "%s", buf); + ret = fscanf(fp, "%255s", buf); continue; } value = strtok_r(NULL, "=", &saveptr); if (!value) { - ret = fscanf(fp, "%s", buf); + ret = fscanf(fp, "%255s", buf); continue; } @@ -784,7 +798,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) char brick_name[PATH_MAX] = { 0, }; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char sign_string[512] = { @@ -805,12 +819,21 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) int brick_count = 0; int len = 0; - gf_proc_dump_lock(); + gf_msg_trace("dump", 0, "received statedump request (sig:USR1)"); if (!ctx) goto out; - if (ctx) { + /* + * Multiplexed daemons can change the active graph when attach/detach + * is called. So this has to be protected with the cleanup lock. + */ + if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) + pthread_mutex_lock(&ctx->cleanup_lock); + gf_proc_dump_lock(); + + if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) && + (ctx && ctx->active)) { top = ctx->active->first; for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { brick_count++; @@ -835,7 +858,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) ? dump_options.dump_path : ((ctx->statedump_path != NULL) ? ctx->statedump_path : DEFAULT_VAR_RUN_DIRECTORY)), - brick_name, getpid(), (uint64_t)time(NULL)); + brick_name, getpid(), (uint64_t)gf_time()); if ((ret < 0) || (ret >= sizeof(path))) { goto out; } @@ -854,17 +877,14 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) // continue even though gettimeofday() has failed ret = gettimeofday(&tv, NULL); if (0 == ret) { - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - len = strlen(timestr); - snprintf(timestr + len, sizeof timestr - len, ".%" GF_PRI_SUSECONDS, - tv.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); } len = snprintf(sign_string, sizeof(sign_string), "DUMP-START-TIME: %s\n", timestr); // swallow the errors of write for start and end marker - ret = sys_write(gf_dump_fd, sign_string, len); + (void)sys_write(gf_dump_fd, sign_string, len); memset(timestr, 0, sizeof(timestr)); @@ -906,15 +926,12 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) ret = gettimeofday(&tv, NULL); if (0 == ret) { - gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT); - len = strlen(timestr); - snprintf(timestr + len, sizeof timestr - len, ".%" GF_PRI_SUSECONDS, - tv.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT); } len = snprintf(sign_string, sizeof(sign_string), "\nDUMP-END-TIME: %s", timestr); - ret = sys_write(gf_dump_fd, sign_string, len); + (void)sys_write(gf_dump_fd, sign_string, len); if (gf_dump_fd != -1) gf_proc_dump_close(); @@ -922,7 +939,11 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) out: GF_FREE(dump_options.dump_path); dump_options.dump_path = NULL; - gf_proc_dump_unlock(); + if (ctx) { + gf_proc_dump_unlock(); + if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) + pthread_mutex_unlock(&ctx->cleanup_lock); + } return; } @@ -1024,7 +1045,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd) { gf_dump_strfd = strfd; - gf_proc_dump_latency_info(this); + gf_proc_dump_xl_latency_info(this); gf_dump_strfd = NULL; } diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c index 39ca3ec1c74..5c316b9291a 100644 --- a/libglusterfs/src/store.c +++ b/libglusterfs/src/store.c @@ -11,19 +11,18 @@ #include <inttypes.h> #include <libgen.h> -#include "glusterfs.h" -#include "store.h" -#include "dict.h" -#include "xlator.h" -#include "syscall.h" -#include "libglusterfs-messages.h" +#include "glusterfs/glusterfs.h" +#include "glusterfs/store.h" +#include "glusterfs/xlator.h" +#include "glusterfs/syscall.h" +#include "glusterfs/libglusterfs-messages.h" int32_t gf_store_mkdir(char *path) { int32_t ret = -1; - ret = mkdir_p(path, 0777, _gf_true); + ret = mkdir_p(path, 0755, _gf_true); if ((-1 == ret) && (EEXIST != errno)) { gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, @@ -184,8 +183,8 @@ out: } int -gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key, - char **iter_val, gf_store_op_errno_t *store_errno) +gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val, + gf_store_op_errno_t *store_errno) { int32_t ret = -1; char *savetok = NULL; @@ -193,15 +192,15 @@ gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key, char *value = NULL; char *temp = NULL; size_t str_len = 0; + char str[8192]; GF_ASSERT(file); - GF_ASSERT(str); GF_ASSERT(iter_key); GF_ASSERT(iter_val); GF_ASSERT(store_errno); retry: - temp = fgets(str, size, file); + temp = fgets(str, 8192, file); if (temp == NULL || feof(file)) { ret = -1; *store_errno = GD_STORE_EOF; @@ -241,13 +240,8 @@ int32_t gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value) { int32_t ret = -1; - char *scan_str = NULL; char *iter_key = NULL; char *iter_val = NULL; - char *free_str = NULL; - struct stat st = { - 0, - }; gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; GF_ASSERT(handle); @@ -279,32 +273,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value) } else { fseek(handle->read, 0, SEEK_SET); } - ret = sys_fstat(handle->fd, &st); - if (ret < 0) { - gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, - "stat on file %s failed", handle->path); - ret = -1; - store_errno = GD_STORE_STAT_FAILED; - goto out; - } - - /* "st.st_size + 1" is used as we are fetching each - * line of a file using fgets, fgets will append "\0" - * to the end of the string - */ - scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char); - - if (scan_str == NULL) { - ret = -1; - store_errno = GD_STORE_ENOMEM; - goto out; - } - - free_str = scan_str; - do { - ret = gf_store_read_and_tokenize(handle->read, scan_str, st.st_size + 1, - &iter_key, &iter_val, &store_errno); + ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val, + &store_errno); if (ret < 0) { gf_msg_trace("", 0, "error while reading key '%s': " @@ -334,8 +305,6 @@ out: sys_close(handle->fd); } - GF_FREE(free_str); - return ret; } @@ -388,6 +357,53 @@ out: } int32_t +gf_store_save_items(int fd, char *items) +{ + int32_t ret = -1; + int dup_fd = -1; + FILE *fp = NULL; + + GF_ASSERT(fd > 0); + GF_ASSERT(items); + + dup_fd = dup(fd); + if (dup_fd == -1) + goto out; + + fp = fdopen(dup_fd, "a+"); + if (fp == NULL) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, + "fdopen failed."); + ret = -1; + goto out; + } + + ret = fputs(items, fp); + if (ret < 0) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, + "Unable to store items: %s", items); + ret = -1; + goto out; + } + + ret = fflush(fp); + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, + "fflush failed."); + ret = -1; + goto out; + } + + ret = 0; +out: + if (fp) + fclose(fp); + + gf_msg_debug(THIS->name, 0, "returning: %d", ret); + return ret; +} + +int32_t gf_store_handle_new(const char *path, gf_store_handle_t **handle) { int32_t ret = -1; @@ -424,7 +440,7 @@ out: if (fd >= 0) sys_close(fd); - if (ret < 0) { + if (ret) { GF_FREE(spath); GF_FREE(shandle); } @@ -561,40 +577,16 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value, gf_store_op_errno_t *op_errno) { int32_t ret = -1; - char *scan_str = NULL; char *iter_key = NULL; char *iter_val = NULL; - struct stat st = { - 0, - }; gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; GF_ASSERT(iter); GF_ASSERT(key); GF_ASSERT(value); - ret = sys_stat(iter->filepath, &st); - if (ret < 0) { - gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED, - "stat on file failed"); - ret = -1; - store_errno = GD_STORE_STAT_FAILED; - goto out; - } - - /* "st.st_size + 1" is used as we are fetching each - * line of a file using fgets, fgets will append "\0" - * to the end of the string - */ - scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char); - if (!scan_str) { - ret = -1; - store_errno = GD_STORE_ENOMEM; - goto out; - } - - ret = gf_store_read_and_tokenize(iter->file, scan_str, st.st_size + 1, - &iter_key, &iter_val, &store_errno); + ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val, + &store_errno); if (ret < 0) { goto out; } @@ -619,7 +611,6 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value, ret = 0; out: - GF_FREE(scan_str); if (ret) { GF_FREE(*key); GF_FREE(*value); @@ -658,23 +649,25 @@ out: } int32_t -gf_store_iter_destroy(gf_store_iter_t *iter) +gf_store_iter_destroy(gf_store_iter_t **iter) { int32_t ret = -1; - if (!iter) + if (!(*iter)) return 0; /* gf_store_iter_new will not return a valid iter object with iter->file * being NULL*/ - ret = fclose(iter->file); + ret = fclose((*iter)->file); if (ret) gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, "Unable" " to close file: %s, ret: %d", - iter->filepath, ret); + (*iter)->filepath, ret); + + GF_FREE(*iter); + *iter = NULL; - GF_FREE(iter); return ret; } diff --git a/libglusterfs/src/strfd.c b/libglusterfs/src/strfd.c index ad66b622361..8a2580edc85 100644 --- a/libglusterfs/src/strfd.c +++ b/libglusterfs/src/strfd.c @@ -10,10 +10,10 @@ #include <stdarg.h> -#include "mem-types.h" -#include "mem-pool.h" -#include "strfd.h" -#include "common-utils.h" +#include "glusterfs/mem-types.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/strfd.h" +#include "glusterfs/common-utils.h" strfd_t * strfd_open() diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c index d2a71698745..d9f1723856d 100644 --- a/libglusterfs/src/syncop-utils.c +++ b/libglusterfs/src/syncop-utils.c @@ -8,10 +8,10 @@ cases as published by the Free Software Foundation. */ -#include "syncop.h" -#include "syncop-utils.h" -#include "common-utils.h" -#include "libglusterfs-messages.h" +#include "glusterfs/syncop.h" +#include "glusterfs/syncop-utils.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/libglusterfs-messages.h" struct syncop_dir_scan_data { xlator_t *subvol; @@ -345,11 +345,16 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, gf_dirent_t *tmp = NULL; uint32_t jobs_running = 0; uint32_t qlen = 0; - pthread_cond_t cond; - pthread_mutex_t mut; - gf_boolean_t cond_init = _gf_false; - gf_boolean_t mut_init = _gf_false; + pthread_cond_t cond = PTHREAD_COND_INITIALIZER; + pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; gf_dirent_t entries; + xlator_t *this = NULL; + + if (frame) { + this = frame->this; + } else { + this = THIS; + } /*For this functionality to be implemented in general, we need * synccond_t infra which doesn't block the executing thread. Until then @@ -371,15 +376,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, INIT_LIST_HEAD(&entries.list); INIT_LIST_HEAD(&q.list); - ret = pthread_mutex_init(&mut, NULL); - if (ret) - goto out; - mut_init = _gf_true; - - ret = pthread_cond_init(&cond, NULL); - if (ret) - goto out; - cond_init = _gf_true; while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, xdata, NULL))) { @@ -397,13 +393,16 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, list_for_each_entry_safe(entry, tmp, &entries.list, list) { + if (this && this->cleanup_starting) + goto out; + list_del_init(&entry->list); if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) { gf_dirent_entry_free(entry); continue; } - if (entry->d_type == IA_IFDIR) { + if (entry->d_stat.ia_type == IA_IFDIR) { ret = fn(subvol, entry, loc, data); gf_dirent_entry_free(entry); if (ret) @@ -442,21 +441,17 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, out: if (fd) fd_unref(fd); - if (mut_init && cond_init) { - pthread_mutex_lock(&mut); - { - while (jobs_running) - pthread_cond_wait(&cond, &mut); - } - pthread_mutex_unlock(&mut); - gf_dirent_free(&q); - gf_dirent_free(&entries); + + pthread_mutex_lock(&mut); + { + while (jobs_running) + pthread_cond_wait(&cond, &mut); } + pthread_mutex_unlock(&mut); + + gf_dirent_free(&q); + gf_dirent_free(&entries); - if (mut_init) - pthread_mutex_destroy(&mut); - if (cond_init) - pthread_cond_destroy(&cond); return ret | retval; } diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 2b8a0a68b00..df20cec559f 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -8,8 +8,12 @@ cases as published by the Free Software Foundation. */ -#include "syncop.h" -#include "libglusterfs-messages.h" +#include "glusterfs/syncop.h" +#include "glusterfs/libglusterfs-messages.h" + +#ifdef HAVE_TSAN_API +#include <sanitizer/tsan_interface.h> +#endif int syncopctx_setfsuid(void *uid) @@ -26,28 +30,10 @@ syncopctx_setfsuid(void *uid) opctx = syncopctx_getctx(); - /* alloc for this thread the first time */ - if (!opctx) { - opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); - if (!opctx) { - ret = -1; - goto out; - } - - ret = syncopctx_setctx(opctx); - if (ret != 0) { - GF_FREE(opctx); - opctx = NULL; - goto out; - } - } + opctx->uid = *(uid_t *)uid; + opctx->valid |= SYNCOPCTX_UID; out: - if (opctx && uid) { - opctx->uid = *(uid_t *)uid; - opctx->valid |= SYNCOPCTX_UID; - } - return ret; } @@ -66,28 +52,10 @@ syncopctx_setfsgid(void *gid) opctx = syncopctx_getctx(); - /* alloc for this thread the first time */ - if (!opctx) { - opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); - if (!opctx) { - ret = -1; - goto out; - } - - ret = syncopctx_setctx(opctx); - if (ret != 0) { - GF_FREE(opctx); - opctx = NULL; - goto out; - } - } + opctx->gid = *(gid_t *)gid; + opctx->valid |= SYNCOPCTX_GID; out: - if (opctx && gid) { - opctx->gid = *(gid_t *)gid; - opctx->valid |= SYNCOPCTX_GID; - } - return ret; } @@ -107,43 +75,20 @@ syncopctx_setfsgroups(int count, const void *groups) opctx = syncopctx_getctx(); - /* alloc for this thread the first time */ - if (!opctx) { - opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); - if (!opctx) { - ret = -1; - goto out; - } - - ret = syncopctx_setctx(opctx); - if (ret != 0) { - GF_FREE(opctx); - opctx = NULL; - goto out; - } - } - /* resize internal groups as required */ if (count && opctx->grpsize < count) { if (opctx->groups) { - tmpgroups = GF_REALLOC(opctx->groups, (sizeof(gid_t) * count)); - /* NOTE: Not really required to zero the reallocation, - * as ngrps controls the validity of data, - * making a note irrespective */ - if (tmpgroups == NULL) { - opctx->grpsize = 0; - GF_FREE(opctx->groups); - opctx->groups = NULL; - ret = -1; - goto out; - } - } else { - tmpgroups = GF_CALLOC(count, sizeof(gid_t), gf_common_mt_syncopctx); - if (tmpgroups == NULL) { - opctx->grpsize = 0; - ret = -1; - goto out; - } + /* Group list will be updated later, so no need to keep current + * data and waste time copying it. It's better to free the current + * allocation and then allocate a fresh new memory block. */ + GF_FREE(opctx->groups); + opctx->groups = NULL; + opctx->grpsize = 0; + } + tmpgroups = GF_MALLOC(count * sizeof(gid_t), gf_common_mt_syncopctx); + if (tmpgroups == NULL) { + ret = -1; + goto out; } opctx->groups = tmpgroups; @@ -156,6 +101,13 @@ syncopctx_setfsgroups(int count, const void *groups) /* set/reset the ngrps, this is where reset of groups is handled */ opctx->ngrps = count; + + if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) { + /* This is the first time we are storing groups into the TLS structure + * so we mark the current thread so that it will be properly cleaned + * up when the thread terminates. */ + gf_thread_needs_cleanup(); + } opctx->valid |= SYNCOPCTX_GROUPS; out: @@ -177,28 +129,10 @@ syncopctx_setfspid(void *pid) opctx = syncopctx_getctx(); - /* alloc for this thread the first time */ - if (!opctx) { - opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); - if (!opctx) { - ret = -1; - goto out; - } - - ret = syncopctx_setctx(opctx); - if (ret != 0) { - GF_FREE(opctx); - opctx = NULL; - goto out; - } - } + opctx->pid = *(pid_t *)pid; + opctx->valid |= SYNCOPCTX_PID; out: - if (opctx && pid) { - opctx->pid = *(pid_t *)pid; - opctx->valid |= SYNCOPCTX_PID; - } - return ret; } @@ -217,35 +151,21 @@ syncopctx_setfslkowner(gf_lkowner_t *lk_owner) opctx = syncopctx_getctx(); - /* alloc for this thread the first time */ - if (!opctx) { - opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); - if (!opctx) { - ret = -1; - goto out; - } - - ret = syncopctx_setctx(opctx); - if (ret != 0) { - GF_FREE(opctx); - opctx = NULL; - goto out; - } - } + opctx->lk_owner = *lk_owner; + opctx->valid |= SYNCOPCTX_LKOWNER; out: - if (opctx && lk_owner) { - opctx->lk_owner = *lk_owner; - opctx->valid |= SYNCOPCTX_LKOWNER; - } - return ret; } +void * +syncenv_processor(void *thdata); + static void __run(struct synctask *task) { struct syncenv *env = NULL; + int32_t total, ret, i; env = task->env; @@ -261,7 +181,6 @@ __run(struct synctask *task) env->runcount--; break; case SYNCTASK_WAIT: - env->waitcount--; break; case SYNCTASK_DONE: gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, @@ -275,8 +194,27 @@ __run(struct synctask *task) } list_add_tail(&task->all_tasks, &env->runq); - env->runcount++; task->state = SYNCTASK_RUN; + + env->runcount++; + + total = env->procs + env->runcount - env->procs_idle; + if (total > env->procmax) { + total = env->procmax; + } + if (total > env->procs) { + for (i = 0; i < env->procmax; i++) { + if (env->proc[i].env == NULL) { + env->proc[i].env = env; + ret = gf_thread_create(&env->proc[i].processor, NULL, + syncenv_processor, &env->proc[i], + "sproc%d", i); + if ((ret < 0) || (++env->procs >= total)) { + break; + } + } + } + } } static void @@ -298,7 +236,6 @@ __wait(struct synctask *task) gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK, "re-waiting already waiting " "task"); - env->waitcount--; break; case SYNCTASK_DONE: gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, @@ -311,12 +248,11 @@ __wait(struct synctask *task) } list_add_tail(&task->all_tasks, &env->waitq); - env->waitcount++; task->state = SYNCTASK_WAIT; } void -synctask_yield(struct synctask *task) +synctask_yield(struct synctask *task, struct timespec *delta) { xlator_t *oldTHIS = THIS; @@ -325,10 +261,16 @@ synctask_yield(struct synctask *task) task->proc->sched.uc_flags &= ~_UC_TLSBASE; #endif + task->delta = delta; + if (task->state != SYNCTASK_DONE) { task->state = SYNCTASK_SUSPEND; - (void)gf_backtrace_save(task->btbuf); } + +#ifdef HAVE_TSAN_API + __tsan_switch_to_fiber(task->proc->tsan.fiber, 0); +#endif + if (swapcontext(&task->ctx, &task->proc->sched) < 0) { gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED, "swapcontext failed"); @@ -338,6 +280,35 @@ synctask_yield(struct synctask *task) } void +synctask_sleep(int32_t secs) +{ + struct timespec delta; + struct synctask *task; + + task = synctask_get(); + + if (task == NULL) { + sleep(secs); + } else { + delta.tv_sec = secs; + delta.tv_nsec = 0; + + synctask_yield(task, &delta); + } +} + +static void +__synctask_wake(struct synctask *task) +{ + task->woken = 1; + + if (task->slept) + __run(task); + + pthread_cond_broadcast(&task->env->cond); +} + +void synctask_wake(struct synctask *task) { struct syncenv *env = NULL; @@ -346,13 +317,18 @@ synctask_wake(struct synctask *task) pthread_mutex_lock(&env->mutex); { - task->woken = 1; + if (task->timer != NULL) { + if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) { + goto unlock; + } - if (task->slept) - __run(task); + task->timer = NULL; + task->synccond = NULL; + } - pthread_cond_broadcast(&env->cond); + __synctask_wake(task); } +unlock: pthread_mutex_unlock(&env->mutex); } @@ -371,7 +347,7 @@ synctask_wrap(void) task->state = SYNCTASK_DONE; - synctask_yield(task); + synctask_yield(task, NULL); } void @@ -390,6 +366,10 @@ synctask_destroy(struct synctask *task) pthread_cond_destroy(&task->cond); } +#ifdef HAVE_TSAN_API + __tsan_destroy_fiber(task->tsan.fiber); +#endif + GF_FREE(task); } @@ -500,6 +480,13 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, makecontext(&newtask->ctx, (void (*)(void))synctask_wrap, 0); +#ifdef HAVE_TSAN_API + newtask->tsan.fiber = __tsan_create_fiber(0); + snprintf(newtask->tsan.name, TSAN_THREAD_NAMELEN, "<synctask of %s>", + this->name); + __tsan_set_fiber_name(newtask->tsan.fiber, newtask->tsan.name); +#endif + newtask->state = SYNCTASK_INIT; newtask->slept = 1; @@ -511,11 +498,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, } synctask_wake(newtask); - /* - * Make sure someone's there to execute anything we just put on the - * run queue. - */ - syncenv_scale(env); return newtask; err: @@ -587,10 +569,6 @@ syncenv_task(struct syncproc *proc) pthread_mutex_lock(&env->mutex); { while (list_empty(&env->runq)) { - sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME; - ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till); - if (!list_empty(&env->runq)) - break; /* If either of the conditions are met then exit * the current thread: * 1. syncenv has to scale down(procs > procmin) @@ -612,6 +590,13 @@ syncenv_task(struct syncproc *proc) pthread_cond_broadcast(&env->cond); goto unlock; } + + env->procs_idle++; + + sleep_till.tv_sec = gf_time() + SYNCPROC_IDLE_TIME; + ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till); + + env->procs_idle--; } task = list_entry(env->runq.next, struct synctask, all_tasks); @@ -630,6 +615,34 @@ unlock: return task; } +static void +synctask_timer(void *data) +{ + struct synctask *task = data; + struct synccond *cond; + + cond = task->synccond; + if (cond != NULL) { + pthread_mutex_lock(&cond->pmutex); + + list_del_init(&task->waitq); + task->synccond = NULL; + + pthread_mutex_unlock(&cond->pmutex); + + task->ret = -ETIMEDOUT; + } + + pthread_mutex_lock(&task->env->mutex); + + gf_timer_call_cancel(task->xl->ctx, task->timer); + task->timer = NULL; + + __synctask_wake(task); + + pthread_mutex_unlock(&task->env->mutex); +} + void synctask_switchto(struct synctask *task) { @@ -645,6 +658,10 @@ synctask_switchto(struct synctask *task) task->ctx.uc_flags &= ~_UC_TLSBASE; #endif +#ifdef HAVE_TSAN_API + __tsan_switch_to_fiber(task->tsan.fiber, 0); +#endif + if (swapcontext(&task->proc->sched, &task->ctx) < 0) { gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED, "swapcontext failed"); @@ -662,7 +679,14 @@ synctask_switchto(struct synctask *task) } else { task->slept = 1; __wait(task); + + if (task->delta != NULL) { + task->timer = gf_timer_call_after(task->xl->ctx, *task->delta, + synctask_timer, task); + } } + + task->delta = NULL; } pthread_mutex_unlock(&env->mutex); } @@ -670,68 +694,27 @@ synctask_switchto(struct synctask *task) void * syncenv_processor(void *thdata) { - struct syncenv *env = NULL; struct syncproc *proc = NULL; struct synctask *task = NULL; proc = thdata; - env = proc->env; - for (;;) { - task = syncenv_task(proc); - if (!task) - break; +#ifdef HAVE_TSAN_API + proc->tsan.fiber = __tsan_create_fiber(0); + snprintf(proc->tsan.name, TSAN_THREAD_NAMELEN, "<sched of syncenv@%p>", + proc); + __tsan_set_fiber_name(proc->tsan.fiber, proc->tsan.name); +#endif + while ((task = syncenv_task(proc)) != NULL) { synctask_switchto(task); - - syncenv_scale(env); } - return NULL; -} - -void -syncenv_scale(struct syncenv *env) -{ - int diff = 0; - int scale = 0; - int i = 0; - int ret = 0; - char thread_name[GF_THREAD_NAMEMAX] = { - 0, - }; - - pthread_mutex_lock(&env->mutex); - { - if (env->procs > env->runcount) - goto unlock; - - scale = env->runcount; - if (scale > env->procmax) - scale = env->procmax; - if (scale > env->procs) - diff = scale - env->procs; - while (diff) { - diff--; - for (; (i < env->procmax); i++) { - if (env->proc[i].processor == 0) - break; - } +#ifdef HAVE_TSAN_API + __tsan_destroy_fiber(proc->tsan.fiber); +#endif - env->proc[i].env = env; - snprintf(thread_name, sizeof(thread_name), "sproc%03hx", - (env->procs & 0x3ff)); - ret = gf_thread_create(&env->proc[i].processor, NULL, - syncenv_processor, &env->proc[i], - thread_name); - if (ret) - break; - env->procs++; - i++; - } - } -unlock: - pthread_mutex_unlock(&env->mutex); + return NULL; } /* The syncenv threads are cleaned up in this routine. @@ -785,9 +768,6 @@ syncenv_new(size_t stacksize, int procmin, int procmax) struct syncenv *newenv = NULL; int ret = 0; int i = 0; - char thread_name[GF_THREAD_NAMEMAX] = { - 0, - }; if (!procmin || procmin < 0) procmin = SYNCENV_PROC_MIN; @@ -813,14 +793,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax) newenv->stacksize = stacksize; newenv->procmin = procmin; newenv->procmax = procmax; + newenv->procs_idle = 0; for (i = 0; i < newenv->procmin; i++) { newenv->proc[i].env = newenv; - snprintf(thread_name, sizeof(thread_name), "%s%d", "sproc", - (newenv->procs)); ret = gf_thread_create(&newenv->proc[i].processor, NULL, - syncenv_processor, &newenv->proc[i], - thread_name); + syncenv_processor, &newenv->proc[i], "sproc%d", + i); if (ret) break; newenv->procs++; @@ -910,7 +889,7 @@ __synclock_lock(struct synclock *lock) task->woken = 0; list_add_tail(&task->waitq, &lock->waitq); pthread_mutex_unlock(&lock->guard); - synctask_yield(task); + synctask_yield(task, NULL); /* task is removed from waitq in unlock, * under lock->guard.*/ pthread_mutex_lock(&lock->guard); @@ -1063,6 +1042,136 @@ synclock_unlock(synclock_t *lock) return ret; } +/* Condition variables */ + +int32_t +synccond_init(synccond_t *cond) +{ + int32_t ret; + + INIT_LIST_HEAD(&cond->waitq); + + ret = pthread_mutex_init(&cond->pmutex, NULL); + if (ret != 0) { + return -ret; + } + + ret = pthread_cond_init(&cond->pcond, NULL); + if (ret != 0) { + pthread_mutex_destroy(&cond->pmutex); + } + + return -ret; +} + +void +synccond_destroy(synccond_t *cond) +{ + pthread_cond_destroy(&cond->pcond); + pthread_mutex_destroy(&cond->pmutex); +} + +int +synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta) +{ + struct timespec now; + struct synctask *task = NULL; + int ret; + + task = synctask_get(); + + if (task == NULL) { + if (delta != NULL) { + timespec_now_realtime(&now); + timespec_adjust_delta(&now, *delta); + } + + pthread_mutex_lock(&cond->pmutex); + + if (delta == NULL) { + ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex); + } else { + ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now); + } + } else { + pthread_mutex_lock(&cond->pmutex); + + list_add_tail(&task->waitq, &cond->waitq); + task->synccond = cond; + + ret = synclock_unlock(lock); + if (ret == 0) { + pthread_mutex_unlock(&cond->pmutex); + + synctask_yield(task, delta); + + ret = synclock_lock(lock); + if (ret == 0) { + ret = task->ret; + } + task->ret = 0; + + return ret; + } + + list_del_init(&task->waitq); + } + + pthread_mutex_unlock(&cond->pmutex); + + return ret; +} + +int +synccond_wait(synccond_t *cond, synclock_t *lock) +{ + return synccond_timedwait(cond, lock, NULL); +} + +void +synccond_signal(synccond_t *cond) +{ + struct synctask *task; + + pthread_mutex_lock(&cond->pmutex); + + if (!list_empty(&cond->waitq)) { + task = list_first_entry(&cond->waitq, struct synctask, waitq); + list_del_init(&task->waitq); + + pthread_mutex_unlock(&cond->pmutex); + + synctask_wake(task); + } else { + pthread_cond_signal(&cond->pcond); + + pthread_mutex_unlock(&cond->pmutex); + } +} + +void +synccond_broadcast(synccond_t *cond) +{ + struct list_head list; + struct synctask *task; + + INIT_LIST_HEAD(&list); + + pthread_mutex_lock(&cond->pmutex); + + list_splice_init(&cond->waitq, &list); + pthread_cond_broadcast(&cond->pcond); + + pthread_mutex_unlock(&cond->pmutex); + + while (!list_empty(&list)) { + task = list_first_entry(&list, struct synctask, waitq); + list_del_init(&task->waitq); + + synctask_wake(task); + } +} + /* Barriers */ int @@ -1132,7 +1241,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor) /* called within a synctask */ list_add_tail(&task->waitq, &barrier->waitq); pthread_mutex_unlock(&barrier->guard); - synctask_yield(task); + synctask_yield(task, NULL); pthread_mutex_lock(&barrier->guard); } else { /* called by a non-synctask */ @@ -1909,6 +2018,7 @@ syncop_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, args->iobref = iobref_ref(iobref); args->vector = iov_dup(vector, count); args->count = count; + args->iatt1 = *stbuf; } __wake(args); @@ -1919,7 +2029,7 @@ syncop_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags, struct iovec **vector, int *count, struct iobref **iobref, - dict_t *xdata_in, dict_t **xdata_out) + struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out) { struct syncargs args = { 0, @@ -1933,6 +2043,9 @@ syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags, else if (args.xdata) dict_unref(args.xdata); + if (iatt) + *iatt = args.iatt1; + if (args.op_ret < 0) goto out; @@ -1970,6 +2083,11 @@ syncop_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, if (xdata) args->xdata = dict_ref(xdata); + if (op_ret >= 0) { + args->iatt1 = *prebuf; + args->iatt2 = *postbuf; + } + __wake(args); return 0; @@ -1978,7 +2096,8 @@ syncop_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector, int32_t count, off_t offset, struct iobref *iobref, - uint32_t flags, dict_t *xdata_in, dict_t **xdata_out) + uint32_t flags, struct iatt *preiatt, struct iatt *postiatt, + dict_t *xdata_in, dict_t **xdata_out) { struct syncargs args = { 0, @@ -1987,6 +2106,11 @@ syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector, SYNCOP(subvol, (&args), syncop_writev_cbk, subvol->fops->writev, fd, (struct iovec *)vector, count, offset, flags, iobref, xdata_in); + if (preiatt) + *preiatt = args.iatt1; + if (postiatt) + *postiatt = args.iatt2; + if (xdata_out) *xdata_out = args.xdata; else if (args.xdata) @@ -2315,14 +2439,19 @@ syncop_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (xdata) args->xdata = dict_ref(xdata); + if (op_ret >= 0) { + args->iatt1 = *prebuf; + args->iatt2 = *postbuf; + } + __wake(args); return 0; } int -syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, dict_t *xdata_in, - dict_t **xdata_out) +syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, struct iatt *preiatt, + struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out) { struct syncargs args = { 0, @@ -2331,6 +2460,11 @@ syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, dict_t *xdata_in, SYNCOP(subvol, (&args), syncop_ftruncate_cbk, subvol->fops->ftruncate, fd, offset, xdata_in); + if (preiatt) + *preiatt = args.iatt1; + if (postiatt) + *postiatt = args.iatt2; + if (xdata_out) *xdata_out = args.xdata; else if (args.xdata) @@ -2376,14 +2510,19 @@ syncop_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (xdata) args->xdata = dict_ref(xdata); + if (op_ret >= 0) { + args->iatt1 = *prebuf; + args->iatt2 = *postbuf; + } + __wake(args); return 0; } int -syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, dict_t *xdata_in, - dict_t **xdata_out) +syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, struct iatt *preiatt, + struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out) { struct syncargs args = { 0, @@ -2392,6 +2531,11 @@ syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, dict_t *xdata_in, SYNCOP(subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync, fd, dataonly, xdata_in); + if (preiatt) + *preiatt = args.iatt1; + if (postiatt) + *postiatt = args.iatt2; + if (xdata_out) *xdata_out = args.xdata; else if (args.xdata) @@ -2946,12 +3090,13 @@ syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what, SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset, what, xdata_in); - if (*off) - *off = args.offset; - - if (args.op_ret == -1) + if (args.op_ret < 0) { return -args.op_errno; - return args.op_ret; + } else { + if (off) + *off = args.offset; + return args.op_ret; + } } int @@ -3366,39 +3511,62 @@ syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } int -syncop_icreate(xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *xdata) +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, struct iatt *stbuf, + struct iatt *preiatt_dst, struct iatt *postiatt_dst, + dict_t *xdata_in, dict_t **xdata_out) { struct syncargs args = { 0, }; - SYNCOP(subvol, (&args), syncop_icreate_cbk, subvol->fops->icreate, loc, - mode, xdata); + SYNCOP(subvol, (&args), syncop_copy_file_range_cbk, + subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len, + flags, xdata_in); - if (xdata) - xdata = args.xdata; - else if (args.xdata) + if (stbuf) { + *stbuf = args.iatt1; + } + if (preiatt_dst) { + *preiatt_dst = args.iatt2; + } + if (postiatt_dst) { + *postiatt_dst = args.iatt3; + } + + if (xdata_out) { + *xdata_out = args.xdata; + } else if (args.xdata) { dict_unref(args.xdata); + } errno = args.op_errno; return args.op_ret; } int -syncop_namelink(xlator_t *subvol, loc_t *loc, dict_t *xdata) +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata) { - struct syncargs args = { - 0, - }; + struct syncargs *args = NULL; - SYNCOP(subvol, (&args), syncop_namelink_cbk, subvol->fops->namelink, loc, - xdata); + args = cookie; + args->op_ret = op_ret; + args->op_errno = op_errno; if (xdata) - xdata = args.xdata; - else if (args.xdata) - dict_unref(args.xdata); + args->xdata = dict_ref(xdata); - errno = args.op_errno; - return args.op_ret; + if (op_ret >= 0) { + args->iatt1 = *stbuf; + args->iatt2 = *prebuf_dst; + args->iatt3 = *postbuf_dst; + } + + __wake(args); + + return 0; } diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index a3bc9189257..04400f98b6c 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -8,17 +8,24 @@ cases as published by the Free Software Foundation. */ -#include "syscall.h" -#include "compat.h" -#include "mem-pool.h" -#include "libglusterfs-messages.h" +#include "glusterfs/compat.h" +#include "glusterfs/syscall.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/libglusterfs-messages.h" +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#include <signal.h> +#endif #include <sys/types.h> #include <utime.h> #include <sys/time.h> #include <fcntl.h> #include <unistd.h> #include <stdarg.h> +#ifdef HAVE_COPY_FILE_RANGE_SYS +#include <sys/syscall.h> +#endif #define FS_ERROR_LOG(result) \ do { \ @@ -211,6 +218,15 @@ sys_unlink(const char *pathname) } int +sys_unlinkat(int dfd, const char *pathname) +{ +#ifdef GF_SOLARIS_HOST_OS + return FS_RET_CHECK0(solaris_unlinkat(dfd, pathname, 0), errno); +#endif + return FS_RET_CHECK0(unlinkat(dfd, pathname, 0), errno); +} + +int sys_rmdir(const char *pathname) { return FS_RET_CHECK0(rmdir(pathname), errno); @@ -223,6 +239,12 @@ sys_symlink(const char *oldpath, const char *newpath) } int +sys_symlinkat(const char *oldpath, int dirfd, const char *newpath) +{ + return FS_RET_CHECK0(symlinkat(oldpath, dirfd, newpath), errno); +} + +int sys_rename(const char *oldpath, const char *newpath) { #ifdef GF_SOLARIS_HOST_OS @@ -250,6 +272,12 @@ sys_link(const char *oldpath, const char *newpath) } int +sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath) +{ + return FS_RET_CHECK0(linkat(oldfd, oldpath, newfd, newpath, 0), errno); +} + +int sys_chmod(const char *path, mode_t mode) { return FS_RET_CHECK0(chmod(path, mode), errno); @@ -482,7 +510,7 @@ sys_lsetxattr(const char *path, const char *name, const void *value, #endif #ifdef GF_BSD_HOST_OS - return FS_RET_CHECK0( + return FS_RET_CHECK( extattr_set_link(path, EXTATTR_NAMESPACE_USER, name, value, size), errno); #endif @@ -600,7 +628,7 @@ sys_fsetxattr(int filedes, const char *name, const void *value, size_t size, #endif #ifdef GF_BSD_HOST_OS - return FS_RET_CHECK0( + return FS_RET_CHECK( extattr_set_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size), errno); #endif @@ -727,3 +755,122 @@ sys_fallocate(int fd, int mode, off_t offset, off_t len) errno = ENOSYS; return -1; } + +int +sys_socket(int domain, int type, int protocol) +{ +#ifdef SOCK_CLOEXEC + return socket(domain, type | SOCK_CLOEXEC, protocol); +#else + int fd = -1; + + fd = socket(domain, type, protocol); + if (fd >= 0) + fcntl(fd, F_SETFD, FD_CLOEXEC); + return fd; +#endif +} + +#if (defined(HAVE_ACCEPT4) || defined(HAVE_PACCEPT)) +static inline int +prep_accept_flags(int flags) +{ + if (flags & O_NONBLOCK) { + flags &= ~O_NONBLOCK; + flags |= SOCK_NONBLOCK; + } + + flags |= SOCK_CLOEXEC; + + return flags; +} +#endif + +int +sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags) +{ + int newsock = -1; + +#ifdef HAVE_ACCEPT4 + + flags = prep_accept_flags(flags); + newsock = accept4(sock, sockaddr, socklen, flags); + +#elif HAVE_PACCEPT + flags = prep_accept_flags(flags); + newsock = paccept(sock, sockaddr, socklen, NULL, flags); + +#else + int op_errno = 0; + int curflag = 0; + int ret = 0; + + newsock = accept(sock, sockaddr, socklen); + if (newsock != -1) { + curflag = fcntl(newsock, F_GETFL); + if (fcntl(newsock, F_SETFL, curflag | flags) == -1) { + op_errno = errno; + goto err; + } + + curflag = fcntl(newsock, F_GETFD); + if (fcntl(newsock, F_SETFD, curflag | FD_CLOEXEC) == -1) { + op_errno = errno; + goto err; + } + } + +err: + if (op_errno) { + close(newsock); + errno = op_errno; + return -1; + } + +#endif + return newsock; +} + +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, + size_t len, unsigned int flags) +{ + /* + * TODO: Add check for other platofrms like freebsd etc if this syscall is + * not generic. + * This is what the function does. + * 1) Check whether copy_file_range API is present. If so call it. + * 2) If copy_file_range API is not present, then check whether + * the system call is there. If so, then use syscall to invoke + * SYS_copy_file_range system call. + * 3) If neither of the above is present, then return ENOSYS. + */ +#ifdef HAVE_COPY_FILE_RANGE + return FS_RET_CHECK( + copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno); +#else +#ifdef HAVE_COPY_FILE_RANGE_SYS + return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, + flags); +#else + errno = ENOSYS; + return -1; +#endif /* HAVE_COPY_FILE_RANGE_SYS */ +#endif /* HAVE_COPY_FILE_RANGE */ +} + +#ifdef __FreeBSD__ +int +sys_kill(pid_t pid, int sig) +{ + return FS_RET_CHECK0(kill(pid, sig), errno); +} + +int +sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp, + const void *newp, size_t newlen) +{ + return FS_RET_CHECK0(sysctl(name, namelen, oldp, oldlenp, newp, newlen), + errno); +} +#endif diff --git a/libglusterfs/src/throttle-tbf.c b/libglusterfs/src/throttle-tbf.c index 9519defa37f..e11ca4f9d35 100644 --- a/libglusterfs/src/throttle-tbf.c +++ b/libglusterfs/src/throttle-tbf.c @@ -23,8 +23,8 @@ * */ -#include "mem-pool.h" -#include "throttle-tbf.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/throttle-tbf.h" typedef struct tbf_throttle { char done; @@ -99,7 +99,7 @@ tbf_tokengenerator(void *arg) token_gen_interval = bucket->token_gen_interval; while (1) { - usleep(token_gen_interval); + gf_nanosleep(token_gen_interval * GF_US_IN_NS); LOCK(&bucket->lock); { diff --git a/libglusterfs/src/tier-ctr-interface.h b/libglusterfs/src/tier-ctr-interface.h deleted file mode 100644 index 8a627542c0c..00000000000 --- a/libglusterfs/src/tier-ctr-interface.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _TIER_CTR_INTERFACE_H_ -#define _TIER_CTR_INTERFACE_H_ - -#include "common-utils.h" -#include "gfdb_data_store_types.h" - -#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op" - -/* - * CTR IPC OPERATIONS - * - * - */ -#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op" -#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op" -#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm" -#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version" -#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma" -/* - * CTR IPC INPUT/OUTPUT - * - * - */ -#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path" -#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms" -#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count" -#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key" -#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version" - -/* - * gfdb ipc ctr params for query - * - * - */ -typedef struct gfdb_ipc_ctr_params { - gf_boolean_t is_promote; - int write_freq_threshold; - int read_freq_threshold; - gfdb_time_t time_stamp; - int query_limit; - gf_boolean_t emergency_demote; -} gfdb_ipc_ctr_params_t; - -#endif diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c index 88a28a9bd16..66c861b04cd 100644 --- a/libglusterfs/src/timer.c +++ b/libglusterfs/src/timer.c @@ -8,12 +8,12 @@ cases as published by the Free Software Foundation. */ -#include "timer.h" -#include "logging.h" -#include "common-utils.h" -#include "globals.h" -#include "timespec.h" -#include "libglusterfs-messages.h" +#include "glusterfs/timer.h" +#include "glusterfs/logging.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/globals.h" +#include "glusterfs/timespec.h" +#include "glusterfs/libglusterfs-messages.h" /* fwd decl */ static gf_timer_registry_t * @@ -53,7 +53,7 @@ gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta, event->callbk = callbk; event->data = data; event->xl = THIS; - LOCK(®->lock); + pthread_mutex_lock(®->lock); { list_for_each_entry_reverse(trav, ®->active, list) { @@ -61,8 +61,11 @@ gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta, break; } list_add(&event->list, &trav->list); + if (&trav->list == ®->active) { + pthread_cond_signal(®->cond); + } } - UNLOCK(®->lock); + pthread_mutex_unlock(®->lock); return event; } @@ -75,13 +78,13 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) if (ctx == NULL || event == NULL) { gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "invalid argument"); - return 0; + return -1; } if (ctx->cleanup_started) { gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED, "ctx cleanup started"); - return 0; + return -1; } LOCK(&ctx->lock); @@ -93,13 +96,12 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) if (!reg) { /* This can happen when cleanup may have just started and * gf_timer_registry_destroy() sets ctx->timer to NULL. - * Just bail out as success as gf_timer_proc() takes - * care of cleaning up the events. + * gf_timer_proc() takes care of cleaning up the events. */ - return 0; + return -1; } - LOCK(®->lock); + pthread_mutex_lock(®->lock); { fired = event->fired; if (fired) @@ -107,7 +109,7 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) list_del(&event->list); } unlock: - UNLOCK(®->lock); + pthread_mutex_unlock(®->lock); if (!fired) { GF_FREE(event); @@ -120,64 +122,29 @@ static void * gf_timer_proc(void *data) { gf_timer_registry_t *reg = data; - struct timespec sleepts; gf_timer_t *event = NULL; gf_timer_t *tmp = NULL; xlator_t *old_THIS = NULL; + pthread_mutex_lock(®->lock); + while (!reg->fin) { - uint64_t now; - struct timespec now_ts; - - timespec_now(&now_ts); - now = TS(now_ts); - while (1) { - uint64_t at; - char need_cbk = 0; - - /* - * This will be overridden with a shorter interval if - * there's an event scheduled sooner. That makes the - * system more responsive in most cases, but doesn't - * include the case where a timer is added while we're - * asleep. It's tempting to use pthread_cond_timedwait, - * with the caveat that we'd be relying on system time - * instead of monotonic time. That's a mess when the - * system time is adjusted. Another alternative might - * be to use pthread_kill, but that will remain TBD for - * now. - */ - sleepts.tv_sec = 1; - sleepts.tv_nsec = 0; - - LOCK(®->lock); - { - /* - * Using list_for_each and then always breaking - * after the first iteration might seem strange, - * but (unlike alternatives) is independent of - * the underlying list implementation. - */ - list_for_each_entry_safe(event, tmp, ®->active, list) - { - at = TS(event->at); - if (now >= at) { - need_cbk = 1; - event->fired = _gf_true; - list_del(&event->list); - } else { - uint64_t diff = now - at; - - if (diff < 1000000000) { - sleepts.tv_sec = 0; - sleepts.tv_nsec = diff; - } - } - break; - } - } - UNLOCK(®->lock); - if (need_cbk) { + if (list_empty(®->active)) { + pthread_cond_wait(®->cond, ®->lock); + } else { + struct timespec now; + + timespec_now(&now); + event = list_first_entry(®->active, gf_timer_t, list); + if (TS(now) < TS(event->at)) { + now = event->at; + pthread_cond_timedwait(®->cond, ®->lock, &now); + } else { + event->fired = _gf_true; + list_del_init(&event->list); + + pthread_mutex_unlock(®->lock); + old_THIS = NULL; if (event->xl) { old_THIS = THIS; @@ -188,26 +155,29 @@ gf_timer_proc(void *data) if (old_THIS) { THIS = old_THIS; } - } else { - break; + + pthread_mutex_lock(®->lock); } } - nanosleep(&sleepts, NULL); } - LOCK(®->lock); + /* Do not call gf_timer_call_cancel(), + * it will lead to deadlock + */ + list_for_each_entry_safe(event, tmp, ®->active, list) { - /* Do not call gf_timer_call_cancel(), - * it will lead to deadlock + list_del(&event->list); + /* TODO Possible resource leak + * Before freeing the event, we need to call the respective + * event functions and free any resources. + * For example, In case of rpc_clnt_reconnect, we need to + * unref rpc object which was taken when added to timer + * wheel. */ - list_for_each_entry_safe(event, tmp, ®->active, list) - { - list_del(&event->list); - GF_FREE(event); - } + GF_FREE(event); } - UNLOCK(®->lock); - LOCK_DESTROY(®->lock); + + pthread_mutex_unlock(®->lock); return NULL; } @@ -217,6 +187,7 @@ gf_timer_registry_init(glusterfs_ctx_t *ctx) { gf_timer_registry_t *reg = NULL; int ret = -1; + pthread_condattr_t attr; LOCK(&ctx->lock); { @@ -231,7 +202,10 @@ gf_timer_registry_init(glusterfs_ctx_t *ctx) goto out; } ctx->timer = reg; - LOCK_INIT(®->lock); + pthread_mutex_init(®->lock, NULL); + pthread_condattr_init(&attr); + pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); + pthread_cond_init(®->cond, &attr); INIT_LIST_HEAD(®->active); } UNLOCK(&ctx->lock); @@ -265,7 +239,18 @@ gf_timer_registry_destroy(glusterfs_ctx_t *ctx) return; thr_id = reg->th; + + pthread_mutex_lock(®->lock); + reg->fin = 1; + pthread_cond_signal(®->cond); + + pthread_mutex_unlock(®->lock); + pthread_join(thr_id, NULL); + + pthread_cond_destroy(®->cond); + pthread_mutex_destroy(®->lock); + GF_FREE(reg); } diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c index d17506662ac..96cef5c6f07 100644 --- a/libglusterfs/src/timespec.c +++ b/libglusterfs/src/timespec.c @@ -19,10 +19,9 @@ static mach_timebase_info_data_t gf_timebase; #endif -#include "logging.h" -#include "timespec.h" -#include "libglusterfs-messages.h" -#include "common-utils.h" +#include "glusterfs/timespec.h" +#include "glusterfs/libglusterfs-messages.h" +#include "glusterfs/common-utils.h" void timespec_now(struct timespec *ts) @@ -71,6 +70,28 @@ timespec_now(struct timespec *ts) } void +timespec_now_realtime(struct timespec *ts) +{ +#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \ + defined GF_BSD_HOST_OS + if (0 == clock_gettime(CLOCK_REALTIME, ts)) { + return; + } +#endif + + /* Fall back to gettimeofday()*/ + struct timeval tv = { + 0, + }; + if (0 == gettimeofday(&tv, NULL)) { + TIMEVAL_TO_TIMESPEC(&tv, ts); + return; + } + + return; +} + +void timespec_adjust_delta(struct timespec *ts, struct timespec delta) { ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000); @@ -90,3 +111,19 @@ timespec_sub(const struct timespec *begin, const struct timespec *end, res->tv_nsec = end->tv_nsec - begin->tv_nsec; } } + +int +timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts) +{ + if (lhs_ts->tv_sec < rhs_ts->tv_sec) { + return -1; + } else if (lhs_ts->tv_sec > rhs_ts->tv_sec) { + return 1; + } else if (lhs_ts->tv_nsec < rhs_ts->tv_nsec) { + return -1; + } else if (lhs_ts->tv_nsec > rhs_ts->tv_nsec) { + return 1; + } + + return 0; +} diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c index 4f01bcfe0da..809550b864c 100644 --- a/libglusterfs/src/trie.c +++ b/libglusterfs/src/trie.c @@ -10,11 +10,9 @@ #include <stdio.h> #include <string.h> -#include <stdlib.h> -#include <ctype.h> -#include "common-utils.h" -#include "trie.h" +#include "glusterfs/common-utils.h" +#include "glusterfs/trie.h" #define DISTANCE_EDIT 1 #define DISTANCE_INS 1 diff --git a/libglusterfs/src/unittest/global_mock.c b/libglusterfs/src/unittest/global_mock.c index 52156847d81..2fcf96dbad8 100644 --- a/libglusterfs/src/unittest/global_mock.c +++ b/libglusterfs/src/unittest/global_mock.c @@ -8,8 +8,8 @@ cases as published by the Free Software Foundation. */ -#include "logging.h" -#include "xlator.h" +#include "glusterfs/logging.h" +#include "glusterfs/xlator.h" #include <stdarg.h> #include <stddef.h> diff --git a/libglusterfs/src/unittest/log_mock.c b/libglusterfs/src/unittest/log_mock.c index d342de31067..60f6530726b 100644 --- a/libglusterfs/src/unittest/log_mock.c +++ b/libglusterfs/src/unittest/log_mock.c @@ -8,8 +8,8 @@ cases as published by the Free Software Foundation. */ -#include "logging.h" -#include "xlator.h" +#include "glusterfs/logging.h" +#include "glusterfs/xlator.h" #include <stdarg.h> #include <stddef.h> diff --git a/libglusterfs/src/unittest/mem_pool_unittest.c b/libglusterfs/src/unittest/mem_pool_unittest.c index 32137b95bb0..9ca324329ba 100644 --- a/libglusterfs/src/unittest/mem_pool_unittest.c +++ b/libglusterfs/src/unittest/mem_pool_unittest.c @@ -8,9 +8,9 @@ cases as published by the Free Software Foundation. */ -#include "mem-pool.h" -#include "logging.h" -#include "xlator.h" +#include "glusterfs/mem-pool.h" +#include "glusterfs/logging.h" +#include "glusterfs/xlator.h" #include <stdarg.h> #include <stddef.h> diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 0f21ea46ed1..9a2582d45d5 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -8,12 +8,12 @@ cases as published by the Free Software Foundation. */ -#include "xlator.h" +#include "glusterfs/xlator.h" #include <dlfcn.h> #include <netdb.h> #include <fnmatch.h> -#include "defaults.h" -#include "libglusterfs-messages.h" +#include "glusterfs/defaults.h" +#include "glusterfs/libglusterfs-messages.h" #define SET_DEFAULT_FOP(fn) \ do { \ @@ -45,9 +45,38 @@ xlator_init_unlock(void) static struct xlator_cbks default_cbks = {}; struct volume_options default_options[] = { - {.key = {NULL}}, + { + .key = {"log-level"}, + .type = GF_OPTION_TYPE_STR, + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + .tags = {"generic"}, + .value = {"DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE", + "TRACE"}, + .description = "Option to set log-level of given translator", + }, + { + .key = {NULL}, + }, }; +/* Handle the common options in each translator */ +void +handle_default_options(xlator_t *xl, dict_t *options) +{ + int ret; + char *value; + + /* log-level */ + ret = dict_get_str(options, "log-level", &value); + if (!ret) { + int log_level = glusterd_check_log_level(value); + if (log_level != -1) { + xl->loglevel = log_level; + } + } +} + static void fill_defaults(xlator_t *xl) { @@ -114,6 +143,7 @@ fill_defaults(xlator_t *xl) SET_DEFAULT_FOP(getspec); SET_DEFAULT_FOP(icreate); SET_DEFAULT_FOP(namelink); + SET_DEFAULT_FOP(copy_file_range); if (!xl->cbks) xl->cbks = &default_cbks; @@ -154,9 +184,11 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle, volume_opt_list_t *opt_list) { int ret = -1; + int flag = 0; char *name = NULL; void *handle = NULL; xlator_api_t *xlapi = NULL; + volume_option_t *opt = NULL; GF_VALIDATE_OR_GOTO("xlator", xlator_type, out); @@ -164,8 +196,10 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle, * need this check */ if (!strstr(xlator_type, "rpc-transport")) ret = gf_asprintf(&name, "%s/%s.so", XLATORDIR, xlator_type); - else + else { + flag = 1; ret = gf_asprintf(&name, "%s/%s.so", XLATORPARENTDIR, xlator_type); + } if (-1 == ret) { goto out; } @@ -176,34 +210,34 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle, handle = dlopen(name, RTLD_NOW); if (!handle) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "%s", - dlerror()); + gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s", + dlerror(), NULL); goto out; } - /* check new struct first, and then check this */ - xlapi = dlsym(handle, "xlator_api"); - if (!xlapi) { - gf_msg("xlator", GF_LOG_DEBUG, 0, LG_MSG_DLSYM_ERROR, - "dlsym(xlator_api) on %s. " - "Fall back to old symbols", - dlerror()); - /* This case is not an error for now, so allow it - to fall back to old methods. */ - opt_list->given_opt = dlsym(handle, "options"); - if (!opt_list->given_opt) { - dlerror(); - gf_msg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, - "Failed to load xlator opt table"); + if (flag == 0) { + /* check new struct first, and then check this */ + xlapi = dlsym(handle, "xlator_api"); + if (!xlapi) { + gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s", + dlerror(), NULL); goto out; } - } else { + opt_list->given_opt = xlapi->options; if (!opt_list->given_opt) { - gf_msg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, - "Failed to load xlator options table"); + gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, NULL); + goto out; + } + } else { + opt = dlsym(handle, "options"); + if (!opt) { + gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s", + dlerror(), NULL); goto out; } + + opt_list->given_opt = opt; } *dl_handle = handle; @@ -219,136 +253,29 @@ out: return ret; } -int -xlator_dynload_oldway(xlator_t *xl) -{ - int i = 0; - int ret = -1; - void *handle = NULL; - volume_opt_list_t *vol_opt = NULL; - class_methods_t *vtbl = NULL; - - handle = xl->dlhandle; - - xl->fops = dlsym(handle, "fops"); - if (!xl->fops) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR, - "dlsym(fops) on %s", dlerror()); - goto out; - } - - xl->cbks = dlsym(handle, "cbks"); - if (!xl->cbks) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR, - "dlsym(cbks) on %s", dlerror()); - goto out; - } - - /* - * If class_methods exists, its contents override any definitions of - * init or fini for that translator. Otherwise, we fall back to the - * older method of looking for init and fini directly. - */ - vtbl = dlsym(handle, "class_methods"); - if (vtbl) { - xl->init = vtbl->init; - xl->fini = vtbl->fini; - xl->reconfigure = vtbl->reconfigure; - xl->notify = vtbl->notify; - } else { - if (!(*VOID(&xl->init) = dlsym(handle, "init"))) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR, - "dlsym(init) on %s", dlerror()); - goto out; - } - - if (!(*VOID(&(xl->fini)) = dlsym(handle, "fini"))) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR, - "dlsym(fini) on %s", dlerror()); - goto out; - } - if (!(*VOID(&(xl->reconfigure)) = dlsym(handle, "reconfigure"))) { - gf_msg_trace("xlator", 0, - "dlsym(reconfigure) on %s " - "-- neglecting", - dlerror()); - } - if (!(*VOID(&(xl->notify)) = dlsym(handle, "notify"))) { - gf_msg_trace("xlator", 0, - "dlsym(notify) on %s -- " - "neglecting", - dlerror()); - } - } - - if (!(xl->dumpops = dlsym(handle, "dumpops"))) { - gf_msg_trace("xlator", 0, - "dlsym(dumpops) on %s -- " - "neglecting", - dlerror()); - } - - if (!(*VOID(&(xl->mem_acct_init)) = dlsym(handle, "mem_acct_init"))) { - gf_msg_trace(xl->name, 0, - "dlsym(mem_acct_init) on %s -- " - "neglecting", - dlerror()); - } - - vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t), - gf_common_mt_volume_opt_list_t); - - if (!vol_opt) { - goto out; - } - - if (!(vol_opt->given_opt = dlsym(handle, "options"))) { - gf_msg_trace(xl->name, 0, - "Strict option validation not " - "enforced -- neglecting (%s)", - dlerror()); - } - INIT_LIST_HEAD(&vol_opt->list); - list_add_tail(&vol_opt->list, &xl->volume_options); - - /* make sure 'min' is set to high value, so it would be - properly set later */ - for (i = 0; i < GF_FOP_MAXVALUE; i++) { - xl->stats.interval.latencies[i].min = 0xffffffff; - } - - ret = 0; - -out: - return ret; -} - -int -xlator_dynload_newway(xlator_t *xl) +static int +xlator_dynload_apis(xlator_t *xl) { int ret = -1; void *handle = NULL; volume_opt_list_t *vol_opt = NULL; xlator_api_t *xlapi = NULL; + int i = 0; handle = xl->dlhandle; xlapi = dlsym(handle, "xlator_api"); if (!xlapi) { - gf_msg("xlator", GF_LOG_DEBUG, 0, LG_MSG_DLSYM_ERROR, - "dlsym(xlator_api) on %s. " - "Fall back to old symbols", - dlerror()); - /* This case is not an error for now, so allow it - to fall back to old methods. */ - ret = 1; + gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "dlsym=%s", + dlerror(), NULL); + ret = -1; goto out; } xl->fops = xlapi->fops; if (!xl->fops) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR, - "%s: struct missing (fops)", xl->name); + gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_STRUCT_MISS, "name=%s", + xl->name, NULL); goto out; } @@ -359,8 +286,8 @@ xlator_dynload_newway(xlator_t *xl) xl->init = xlapi->init; if (!xl->init) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR, - "%s: method missing (init)", xl->name); + gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_METHOD_MISS, "name=%s", + xl->name, NULL); goto out; } @@ -406,17 +333,23 @@ xlator_dynload_newway(xlator_t *xl) if (!vol_opt) { goto out; } - - vol_opt->given_opt = xlapi->options; - if (!vol_opt->given_opt) { - gf_msg("xlator", GF_LOG_INFO, 0, LG_MSG_DLSYM_ERROR, - "%s: options not provided, using default", xl->name); - vol_opt->given_opt = default_options; - } - INIT_LIST_HEAD(&vol_opt->list); + + vol_opt->given_opt = default_options; list_add_tail(&vol_opt->list, &xl->volume_options); + if (xlapi->options) { + vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t), + gf_common_mt_volume_opt_list_t); + if (!vol_opt) { + goto out; + } + INIT_LIST_HEAD(&vol_opt->list); + + vol_opt->given_opt = xlapi->options; + list_add_tail(&vol_opt->list, &xl->volume_options); + } + xl->id = xlapi->xlator_id; xl->flags = xlapi->flags; xl->identifier = xlapi->identifier; @@ -425,6 +358,10 @@ xlator_dynload_newway(xlator_t *xl) memcpy(xl->op_version, xlapi->op_version, sizeof(uint32_t) * GF_MAX_RELEASES); + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + gf_latency_reset(&xl->stats.interval.latencies[i]); + } + ret = 0; out: return ret; @@ -452,21 +389,15 @@ xlator_dynload(xlator_t *xl) handle = dlopen(name, RTLD_NOW); if (!handle) { - gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "%s", - dlerror()); + gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s", + dlerror(), NULL); goto out; } xl->dlhandle = handle; - ret = xlator_dynload_newway(xl); + ret = xlator_dynload_apis(xl); if (-1 == ret) goto out; - if (1 == ret) { - /* it means we don't find the new symbol in xlator code */ - ret = xlator_dynload_oldway(xl); - if (-1 == ret) - goto out; - } fill_defaults(xl); @@ -526,10 +457,8 @@ xlator_set_inode_lru_limit(xlator_t *this, void *data) if (this->itable) { if (!data) { - gf_msg(this->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY, - "input data is NULL. " - "Cannot update the lru limit of the inode" - " table. Continuing with older value"); + gf_smsg(this->name, GF_LOG_WARNING, 0, LG_MSG_INPUT_DATA_NULL, + NULL); goto out; } inode_lru_limit = *(int *)data; @@ -681,6 +610,7 @@ __xlator_init(xlator_t *xl) GF_ATOMIC_INIT(xl->stats.interval.count, 0); xlator_init_lock(); + handle_default_options(xl, xl->options); ret = xl->init(xl); xlator_init_unlock(); @@ -700,19 +630,17 @@ xlator_init(xlator_t *xl) xl->mem_acct_init(xl); xl->instance_name = NULL; + GF_ATOMIC_INIT(xl->xprtrefcnt, 0); if (!xl->init) { - gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, - "No init() found"); + gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, NULL); goto out; } ret = __xlator_init(xl); if (ret) { - gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR, - "Initialization of volume '%s' failed," - " review your volfile again", - xl->name); + gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR, "name=%s", + xl->name, NULL); goto out; } @@ -745,6 +673,7 @@ xlator_fini_rec(xlator_t *xl) trav = trav->next; } + xl->cleanup_starting = 1; if (xl->init_succeeded) { if (xl->fini) { old_THIS = THIS; @@ -752,8 +681,14 @@ xlator_fini_rec(xlator_t *xl) xl->fini(xl); - if (xl->local_pool) + if (xl->local_pool) { mem_pool_destroy(xl->local_pool); + xl->local_pool = NULL; + } + if (xl->itable) { + inode_table_destroy(xl->itable); + xl->itable = NULL; + } THIS = old_THIS; } else { @@ -822,6 +757,19 @@ xlator_mem_acct_init(xlator_t *xl, int num_types) } void +xlator_mem_acct_unref(struct mem_acct *mem_acct) +{ + uint32_t i; + + if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { + for (i = 0; i < mem_acct->num_types; i++) { + LOCK_DESTROY(&(mem_acct->rec[i].lock)); + } + FREE(mem_acct); + } +} + +void xlator_tree_fini(xlator_t *xl) { xlator_t *top = NULL; @@ -852,7 +800,6 @@ xlator_list_destroy(xlator_list_t *list) int xlator_memrec_free(xlator_t *xl) { - uint32_t i = 0; struct mem_acct *mem_acct = NULL; if (!xl) { @@ -861,13 +808,8 @@ xlator_memrec_free(xlator_t *xl) mem_acct = xl->mem_acct; if (mem_acct) { - for (i = 0; i < mem_acct->num_types; i++) { - LOCK_DESTROY(&(mem_acct->rec[i].lock)); - } - if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { - FREE(mem_acct); - xl->mem_acct = NULL; - } + xlator_mem_acct_unref(mem_acct); + xl->mem_acct = NULL; } return 0; @@ -884,7 +826,7 @@ xlator_members_free(xlator_t *xl) GF_FREE(xl->name); GF_FREE(xl->type); - if (!(xl->ctx && xl->ctx->cmd_args.valgrind) && xl->dlhandle) + if (!(xl->ctx && xl->ctx->cmd_args.vgtool != _gf_none) && xl->dlhandle) dlclose(xl->dlhandle); if (xl->options) dict_unref(xl->options); @@ -934,8 +876,7 @@ xlator_tree_free_members(xlator_t *tree) xlator_t *prev = tree; if (!tree) { - gf_msg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, - "Translator tree not found"); + gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL); return -1; } @@ -955,8 +896,7 @@ xlator_tree_free_memacct(xlator_t *tree) xlator_t *prev = tree; if (!tree) { - gf_msg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, - "Translator tree not found"); + gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL); return -1; } @@ -980,7 +920,6 @@ xlator_mem_free(xlator_t *xl) return 0; if (xl->options) { - dict_ref(xl->options); dict_unref(xl->options); xl->options = NULL; } @@ -999,7 +938,7 @@ xlator_mem_free(xlator_t *xl) static void xlator_call_fini(xlator_t *this) { - if (!this || this->cleanup_starting) + if (!this || this->call_cleanup) return; this->cleanup_starting = 1; this->call_cleanup = 1; @@ -1018,6 +957,8 @@ xlator_mem_cleanup(xlator_t *this) xlator_list_t **trav_p = NULL; xlator_t *top = NULL; xlator_t *victim = NULL; + glusterfs_graph_t *graph = NULL; + gf_boolean_t graph_cleanup = _gf_false; if (this->call_cleanup || !this->ctx) return; @@ -1025,6 +966,12 @@ xlator_mem_cleanup(xlator_t *this) this->call_cleanup = 1; ctx = this->ctx; + inode_table = this->itable; + if (inode_table) { + inode_table_destroy(inode_table); + this->itable = NULL; + } + xlator_call_fini(trav); while (prev) { @@ -1033,12 +980,6 @@ xlator_mem_cleanup(xlator_t *this) prev = trav; } - inode_table = this->itable; - if (inode_table) { - inode_table_destroy(inode_table); - this->itable = NULL; - } - if (this->fini) { this->fini(this); } @@ -1048,29 +989,28 @@ xlator_mem_cleanup(xlator_t *this) if (ctx->active) { top = ctx->active->first; LOCK(&ctx->volfile_lock); - /* TODO here we have leak for xlator node in a graph */ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { victim = (*trav_p)->xlator; if (victim->call_cleanup && !strcmp(victim->name, this->name)) { + graph_cleanup = _gf_true; (*trav_p) = (*trav_p)->next; break; } } - /* TODO Sometime brick xlator is not moved from graph so followed below - approach to move brick xlator from a graph, will move specific brick - xlator from graph only while inode table and mem_acct are cleaned up - */ - trav_p = &top->children; - while (*trav_p) { - victim = (*trav_p)->xlator; - if (victim->call_cleanup && !victim->itable && !victim->mem_acct) { - (*trav_p) = (*trav_p)->next; - } else { - trav_p = &(*trav_p)->next; - } - } UNLOCK(&ctx->volfile_lock); } + + if (graph_cleanup) { + prev = this; + graph = ctx->active; + pthread_mutex_lock(&graph->mutex); + while (prev) { + trav = prev->next; + GF_FREE(prev); + prev = trav; + } + pthread_mutex_unlock(&graph->mutex); + } } void @@ -1370,8 +1310,21 @@ xlator_destroy(xlator_t *xl) return 0; } +static int32_t +gf_bin_to_string(char *dst, size_t size, void *src, size_t len) +{ + if (len >= size) { + return EINVAL; + } + + memcpy(dst, src, len); + dst[len] = 0; + + return 0; +} + int -is_gf_log_command(xlator_t *this, const char *name, char *value) +is_gf_log_command(xlator_t *this, const char *name, char *value, size_t size) { xlator_t *trav = NULL; char key[1024] = { @@ -1383,7 +1336,11 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) glusterfs_ctx_t *ctx = NULL; if (!strcmp("trusted.glusterfs.syslog", name)) { - ret = gf_string2boolean(value, &syslog_flag); + ret = gf_bin_to_string(key, sizeof(key), value, size); + if (ret != 0) { + goto out; + } + ret = gf_string2boolean(key, &syslog_flag); if (ret) { ret = EOPNOTSUPP; goto out; @@ -1399,7 +1356,12 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) if (fnmatch("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE)) goto out; - log_level = glusterd_check_log_level(value); + ret = gf_bin_to_string(key, sizeof(key), value, size); + if (ret != 0) { + goto out; + } + + log_level = glusterd_check_log_level(key); if (log_level == -1) { ret = EOPNOTSUPP; goto out; @@ -1407,9 +1369,9 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) /* Some crude way to change the log-level of process */ if (!strcmp(name, "trusted.glusterfs.set-log-level")) { - gf_msg("glusterfs", gf_log_get_loglevel(), 0, LG_MSG_SET_LOG_LEVEL, - "setting log level to %d (old-value=%d)", log_level, - gf_log_get_loglevel()); + gf_smsg("glusterfs", gf_log_get_loglevel(), 0, LG_MSG_SET_LOG_LEVEL, + "new-value=%d", log_level, "old-value=%d", + gf_log_get_loglevel(), NULL); gf_log_set_loglevel(this->ctx, log_level); ret = 0; goto out; @@ -1417,9 +1379,9 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) if (!strcmp(name, "trusted.glusterfs.fuse.set-log-level")) { /* */ - gf_msg(this->name, gf_log_get_xl_loglevel(this), 0, - LG_MSG_SET_LOG_LEVEL, "setting log level to %d (old-value=%d)", - log_level, gf_log_get_xl_loglevel(this)); + gf_smsg(this->name, gf_log_get_xl_loglevel(this), 0, + LG_MSG_SET_LOG_LEVEL, "new-value=%d", log_level, "old-value=%d", + gf_log_get_xl_loglevel(this), NULL); gf_log_set_xl_loglevel(this, log_level); ret = 0; goto out; @@ -1435,10 +1397,9 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) while (trav) { snprintf(key, 1024, "trusted.glusterfs.%s.set-log-level", trav->name); if (fnmatch(name, key, FNM_NOESCAPE) == 0) { - gf_msg(trav->name, gf_log_get_xl_loglevel(trav), 0, - LG_MSG_SET_LOG_LEVEL, - "setting log level to %d (old-value=%d)", log_level, - gf_log_get_xl_loglevel(trav)); + gf_smsg(trav->name, gf_log_get_xl_loglevel(trav), 0, + LG_MSG_SET_LOG_LEVEL, "new-value%d", log_level, + "old-value=%d", gf_log_get_xl_loglevel(trav), NULL); gf_log_set_xl_loglevel(trav, log_level); ret = 0; } @@ -1470,9 +1431,7 @@ glusterd_check_log_level(const char *value) } if (log_level == -1) - gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_FAILED, - "Invalid log-level. possible values are " - "DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE"); + gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_INIT, NULL); return log_level; } @@ -1531,3 +1490,104 @@ glusterfs_delete_volfile_checksum(glusterfs_ctx_t *ctx, const char *volfile_id) return 0; } + +/* + The function is required to take dict ref for every xlator at graph. + At the time of compare graph topology create a graph and populate + key values in the dictionary, after finished graph comparison we do destroy + the new graph.At the time of construct graph we don't take any reference + so to avoid dict leak at the of destroying graph due to ref counter underflow + we need to call dict_ref here. + +*/ + +void +gluster_graph_take_reference(xlator_t *tree) +{ + xlator_t *trav = tree; + xlator_t *prev = tree; + + if (!tree) { + gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL); + return; + } + + while (prev) { + trav = prev->next; + if (prev->options) + dict_ref(prev->options); + prev = trav; + } + return; +} + +gf_boolean_t +mgmt_is_multiplexed_daemon(char *name) +{ + const char *mux_daemons[] = {"glustershd", NULL}; + int i; + + if (!name) + return _gf_false; + + for (i = 0; mux_daemons[i]; i++) { + if (!strcmp(name, mux_daemons[i])) + return _gf_true; + } + return _gf_false; +} + +gf_boolean_t +xlator_is_cleanup_starting(xlator_t *this) +{ + gf_boolean_t cleanup = _gf_false; + glusterfs_graph_t *graph = NULL; + xlator_t *xl = NULL; + + if (!this) { + gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "xlator", + NULL); + goto out; + } + + graph = this->graph; + if (!graph) { + gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_NOT_SET, + "name=%s", this->name, NULL); + goto out; + } + + xl = graph->first; + if (xl && xl->cleanup_starting) + cleanup = _gf_true; +out: + return cleanup; +} + +int +graph_total_client_xlator(glusterfs_graph_t *graph) +{ + xlator_t *xl = NULL; + int count = 0; + + if (!graph) { + gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "graph", + NULL); + goto out; + } + + xl = graph->first; + if (!strcmp(xl->type, "protocol/server")) { + gf_msg_debug(xl->name, 0, "Return because it is a server graph"); + return 0; + } + + while (xl) { + if (strcmp(xl->type, "protocol/client") == 0) { + count++; + } + xl = xl->next; + } +out: + return count; +} |
