diff options
Diffstat (limited to 'libglusterfs/src/glusterfs/syncop.h')
| -rw-r--r-- | libglusterfs/src/glusterfs/syncop.h | 637 | 
1 files changed, 637 insertions, 0 deletions
| diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h new file mode 100644 index 00000000000..203abe92b57 --- /dev/null +++ b/libglusterfs/src/glusterfs/syncop.h @@ -0,0 +1,637 @@ +/* +  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#ifndef _SYNCOP_H +#define _SYNCOP_H + +#include "glusterfs/xlator.h" +#include <sys/time.h> +#include <pthread.h> +#include <ucontext.h> + +#define SYNCENV_PROC_MAX 16 +#define SYNCENV_PROC_MIN 2 +#define SYNCPROC_IDLE_TIME 600 + +/* + * Flags for syncopctx valid elements + */ +#define SYNCOPCTX_UID 0x00000001 +#define SYNCOPCTX_GID 0x00000002 +#define SYNCOPCTX_GROUPS 0x00000004 +#define SYNCOPCTX_PID 0x00000008 +#define SYNCOPCTX_LKOWNER 0x00000010 + +struct synctask; +struct syncproc; +struct syncenv; + +typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque); + +typedef int (*synctask_fn_t)(void *opaque); + +typedef enum { +    SYNCTASK_INIT = 0, +    SYNCTASK_RUN, +    SYNCTASK_SUSPEND, +    SYNCTASK_WAIT, +    SYNCTASK_DONE, +    SYNCTASK_ZOMBIE, +} synctask_state_t; + +/* for one sequential execution of @syncfn */ +struct synctask { +    struct list_head all_tasks; +    struct syncenv *env; +    xlator_t *xl; +    call_frame_t *frame; +    call_frame_t *opframe; +    synctask_cbk_t synccbk; +    synctask_fn_t syncfn; +    synctask_state_t state; +    void *opaque; +    void *stack; +    int woken; +    int slept; +    int ret; + +    uid_t uid; +    gid_t gid; + +    ucontext_t ctx; +    struct syncproc *proc; + +    pthread_mutex_t mutex; /* for synchronous spawning of synctask */ +    pthread_cond_t cond; +    int done; + +    struct list_head waitq; /* can wait only "once" at a time */ +    char btbuf[GF_BACKTRACE_LEN]; +}; + +struct syncproc { +    pthread_t processor; +    ucontext_t sched; +    struct syncenv *env; +    struct synctask *current; +}; + +/* hosts the scheduler thread and framework for executing synctasks */ +struct syncenv { +    struct syncproc proc[SYNCENV_PROC_MAX]; +    int procs; + +    struct list_head runq; +    int runcount; +    struct list_head waitq; +    int waitcount; + +    int procmin; +    int procmax; + +    pthread_mutex_t mutex; +    pthread_cond_t cond; + +    size_t stacksize; + +    int destroy; /* FLAG to mark syncenv is in destroy mode +                    so that no more synctasks are accepted*/ +}; + +typedef enum { LOCK_NULL = 0, LOCK_TASK, LOCK_THREAD } lock_type_t; + +typedef enum { +    SYNC_LOCK_DEFAULT = 0, +    SYNC_LOCK_RECURSIVE, /*it allows recursive locking*/ +} lock_attr_t; + +struct synclock { +    pthread_mutex_t guard;  /* guard the remaining members, pair @cond */ +    pthread_cond_t cond;    /* waiting non-synctasks */ +    struct list_head waitq; /* waiting synctasks */ +    volatile int lock;      /* true(non zero) or false(zero), lock status */ +    lock_attr_t attr; +    struct synctask *owner; /* NULL if current owner is not a synctask */ +    pthread_t owner_tid; +    lock_type_t type; +}; +typedef struct synclock synclock_t; + +struct syncbarrier { +    gf_boolean_t initialized; /*Set on successful initialization*/ +    pthread_mutex_t guard;    /* guard the remaining members, pair @cond */ +    pthread_cond_t cond;      /* waiting non-synctasks */ +    struct list_head waitq;   /* waiting synctasks */ +    int count;                /* count the number of wakes */ +    int waitfor;              /* no. of wakes until which task can be in +                                 waitq before being woken up. */ +}; +typedef struct syncbarrier syncbarrier_t; + +struct syncargs { +    int op_ret; +    int op_errno; +    struct iatt iatt1; +    struct iatt iatt2; +    dict_t *xattr; +    struct statvfs statvfs_buf; +    struct iovec *vector; +    int count; +    struct iobref *iobref; +    char *buffer; +    dict_t *xdata; +    struct gf_flock flock; +    struct gf_lease lease; +    dict_t *dict_out; + +    /* some more _cbk needs */ +    uuid_t uuid; +    char *errstr; +    dict_t *dict; +    pthread_mutex_t lock_dict; + +    syncbarrier_t barrier; + +    /* do not touch */ +    struct synctask *task; +    pthread_mutex_t mutex; +    pthread_cond_t cond; +    int done; + +    gf_dirent_t entries; +    off_t offset; + +    lock_migration_info_t locklist; +}; + +struct syncopctx { +    unsigned int valid; /* valid flags for elements that are set */ +    uid_t uid; +    gid_t gid; +    int grpsize; +    int ngrps; +    gid_t *groups; +    pid_t pid; +    gf_lkowner_t lk_owner; +}; + +#define __yawn(args)                                                           \ +    do {                                                                       \ +        args->task = synctask_get();                                           \ +        if (args->task)                                                        \ +            break;                                                             \ +        pthread_mutex_init(&args->mutex, NULL);                                \ +        pthread_cond_init(&args->cond, NULL);                                  \ +        args->done = 0;                                                        \ +    } while (0) + +#define __wake(args)                                                           \ +    do {                                                                       \ +        if (args->task) {                                                      \ +            synctask_wake(args->task);                                         \ +        } else {                                                               \ +            pthread_mutex_lock(&args->mutex);                                  \ +            {                                                                  \ +                args->done = 1;                                                \ +                pthread_cond_signal(&args->cond);                              \ +            }                                                                  \ +            pthread_mutex_unlock(&args->mutex);                                \ +        }                                                                      \ +    } while (0) + +#define __yield(args)                                                          \ +    do {                                                                       \ +        if (args->task) {                                                      \ +            synctask_yield(args->task);                                        \ +        } else {                                                               \ +            pthread_mutex_lock(&args->mutex);                                  \ +            {                                                                  \ +                while (!args->done)                                            \ +                    pthread_cond_wait(&args->cond, &args->mutex);              \ +            }                                                                  \ +            pthread_mutex_unlock(&args->mutex);                                \ +            pthread_mutex_destroy(&args->mutex);                               \ +            pthread_cond_destroy(&args->cond);                                 \ +        }                                                                      \ +    } while (0) + +#define SYNCOP(subvol, stb, cbk, fn_op, params...)                             \ +    do {                                                                       \ +        struct synctask *task = NULL;                                          \ +        call_frame_t *frame = NULL;                                            \ +                                                                               \ +        task = synctask_get();                                                 \ +        stb->task = task;                                                      \ +        if (task)                                                              \ +            frame = task->opframe;                                             \ +        else                                                                   \ +            frame = syncop_create_frame(THIS);                                 \ +                                                                               \ +        if (task) {                                                            \ +            frame->root->uid = task->uid;                                      \ +            frame->root->gid = task->gid;                                      \ +        }                                                                      \ +                                                                               \ +        __yawn(stb);                                                           \ +                                                                               \ +        frame->op = get_fop_index_from_fn(subvol, fn_op);                      \ +        STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params);     \ +                                                                               \ +        __yield(stb);                                                          \ +        if (task)                                                              \ +            STACK_RESET(frame->root);                                          \ +        else                                                                   \ +            STACK_DESTROY(frame->root);                                        \ +    } while (0) + +/* + * syncop_xxx() calls are executed in two ways, one is inside a synctask where + * the executing function will do 'swapcontext' and the other is without + * synctask where the executing thread is made to wait using pthread_cond_wait. + * Executing thread may change when syncop_xxx() is executed inside a synctask. + * This leads to errno_location change i.e. errno may give errno of + * non-executing thread. So errno is not touched inside a synctask execution. + * All gfapi calls are executed using the second way of executing syncop_xxx() + * where the executing thread waits using pthread_cond_wait so it is ok to set + * errno in these cases. The following macro makes syncop_xxx() behave just + * like a system call, where -1 is returned and errno is set when a failure + * occurs. + */ +#define DECODE_SYNCOP_ERR(ret)                                                 \ +    do {                                                                       \ +        if (ret < 0) {                                                         \ +            errno = -ret;                                                      \ +            ret = -1;                                                          \ +        } else {                                                               \ +            errno = 0;                                                         \ +        }                                                                      \ +    } while (0) + +#define SYNCENV_DEFAULT_STACKSIZE (2 * 1024 * 1024) + +struct syncenv * +syncenv_new(size_t stacksize, int procmin, int procmax); +void +syncenv_destroy(struct syncenv *); +void +syncenv_scale(struct syncenv *env); + +int +synctask_new1(struct syncenv *, size_t stacksize, synctask_fn_t, synctask_cbk_t, +              call_frame_t *frame, void *); +int +synctask_new(struct syncenv *, synctask_fn_t, synctask_cbk_t, +             call_frame_t *frame, void *); +struct synctask * +synctask_create(struct syncenv *, size_t stacksize, synctask_fn_t, +                synctask_cbk_t, call_frame_t *, void *); +int +synctask_join(struct synctask *task); +void +synctask_wake(struct synctask *task); +void +synctask_yield(struct synctask *task); +void +synctask_waitfor(struct synctask *task, int count); + +#define synctask_barrier_init(args) syncbarrier_init(&args->barrier) +#define synctask_barrier_wait(args, n) syncbarrier_wait(&args->barrier, n) +#define synctask_barrier_wake(args) syncbarrier_wake(&args->barrier) + +int +synctask_setid(struct synctask *task, uid_t uid, gid_t gid); +#define SYNCTASK_SETID(uid, gid) synctask_setid(synctask_get(), uid, gid); + +int +syncopctx_setfsuid(void *uid); +int +syncopctx_setfsgid(void *gid); +int +syncopctx_setfsgroups(int count, const void *groups); +int +syncopctx_setfspid(void *pid); +int +syncopctx_setfslkowner(gf_lkowner_t *lk_owner); + +static inline call_frame_t * +syncop_create_frame(xlator_t *this) +{ +    call_frame_t *frame = NULL; +    int ngrps = -1; +    struct syncopctx *opctx = NULL; + +    frame = create_frame(this, this->ctx->pool); +    if (!frame) +        return NULL; + +    frame->root->type = GF_OP_TYPE_FOP; +    opctx = syncopctx_getctx(); + +    if (opctx && (opctx->valid & SYNCOPCTX_PID)) +        frame->root->pid = opctx->pid; +    else +        frame->root->pid = getpid(); + +    if (opctx && (opctx->valid & SYNCOPCTX_UID)) +        frame->root->uid = opctx->uid; +    else +        frame->root->uid = geteuid(); + +    if (opctx && (opctx->valid & SYNCOPCTX_GID)) +        frame->root->gid = opctx->gid; +    else +        frame->root->gid = getegid(); + +    if (opctx && (opctx->valid & SYNCOPCTX_GROUPS)) { +        ngrps = opctx->ngrps; + +        if (ngrps != 0 && opctx->groups != NULL) { +            if (call_stack_alloc_groups(frame->root, ngrps) != 0) { +                STACK_DESTROY(frame->root); +                return NULL; +            } + +            memcpy(frame->root->groups, opctx->groups, (sizeof(gid_t) * ngrps)); +        } +    } else { +        ngrps = getgroups(0, 0); +        if (ngrps < 0) { +            STACK_DESTROY(frame->root); +            return NULL; +        } + +        if (call_stack_alloc_groups(frame->root, ngrps) != 0) { +            STACK_DESTROY(frame->root); +            return NULL; +        } + +        if (getgroups(ngrps, frame->root->groups) < 0) { +            STACK_DESTROY(frame->root); +            return NULL; +        } +    } + +    if (opctx && (opctx->valid & SYNCOPCTX_LKOWNER)) +        frame->root->lk_owner = opctx->lk_owner; + +    return frame; +} + +int +synclock_init(synclock_t *lock, lock_attr_t attr); +int +synclock_destroy(synclock_t *lock); +int +synclock_lock(synclock_t *lock); +int +synclock_trylock(synclock_t *lock); +int +synclock_unlock(synclock_t *lock); + +int +syncbarrier_init(syncbarrier_t *barrier); +int +syncbarrier_wait(syncbarrier_t *barrier, int waitfor); +int +syncbarrier_wake(syncbarrier_t *barrier); +int +syncbarrier_destroy(syncbarrier_t *barrier); + +int +syncop_lookup(xlator_t *subvol, loc_t *loc, +              /* out */ +              struct iatt *iatt, struct iatt *parent, +              /* xdata */ +              dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_readdirp(xlator_t *subvol, fd_t *fd, size_t size, off_t off, +                /* out */ +                gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_readdir(xlator_t *subvol, fd_t *fd, size_t size, off_t off, +               gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_opendir(xlator_t *subvol, loc_t *loc, fd_t *fd, dict_t *xdata_in, +               dict_t **xdata_out); + +int +syncop_setattr(xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, +               /* out */ +               struct iatt *preop, struct iatt *postop, dict_t *xdata_in, +               dict_t **xdata_out); + +int +syncop_fsetattr(xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid, +                /* out */ +                struct iatt *preop, struct iatt *postop, dict_t *xdata_in, +                dict_t **xdata_out); + +int +syncop_statfs(xlator_t *subvol, loc_t *loc, +              /* out */ +              struct statvfs *buf, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_setxattr(xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags, +                dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_fsetxattr(xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags, +                 dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_listxattr(xlator_t *subvol, loc_t *loc, dict_t **dict, dict_t *xdata_in, +                 dict_t **xdata_out); + +int +syncop_getxattr(xlator_t *xl, loc_t *loc, dict_t **dict, const char *key, +                dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_fgetxattr(xlator_t *xl, fd_t *fd, dict_t **dict, const char *key, +                 dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_removexattr(xlator_t *subvol, loc_t *loc, const char *name, +                   dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_fremovexattr(xlator_t *subvol, fd_t *fd, const char *name, +                    dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_create(xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode, +              fd_t *fd, struct iatt *iatt, dict_t *xdata_in, +              dict_t **xdata_out); + +int +syncop_open(xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd, +            dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_close(fd_t *fd); + +int +syncop_write(xlator_t *subvol, fd_t *fd, const char *buf, int size, +             off_t offset, struct iobref *iobref, uint32_t flags, +             dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector, +              int32_t count, off_t offset, struct iobref *iobref, +              uint32_t flags, struct iatt *preiatt, struct iatt *postiatt, +              dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags, +             /* out */ +             struct iovec **vector, int *count, struct iobref **iobref, +             struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, struct iatt *preiatt, +                 struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_truncate(xlator_t *subvol, loc_t *loc, off_t offset, dict_t *xdata_in, +                dict_t **xdata_out); + +int +syncop_unlink(xlator_t *subvol, loc_t *loc, dict_t *xdata_in, +              dict_t **xdata_out); + +int +syncop_rmdir(xlator_t *subvol, loc_t *loc, int flags, dict_t *xdata_in, +             dict_t **xdata_out); + +int +syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, struct iatt *preiatt, +             struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_flush(xlator_t *subvol, fd_t *fd, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_fstat(xlator_t *subvol, fd_t *fd, struct iatt *stbuf, dict_t *xdata_in, +             dict_t **xdata_out); + +int +syncop_stat(xlator_t *subvol, loc_t *loc, struct iatt *stbuf, dict_t *xdata_in, +            dict_t **xdata_out); + +int +syncop_symlink(xlator_t *subvol, loc_t *loc, const char *newpath, +               struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_readlink(xlator_t *subvol, loc_t *loc, char **buffer, size_t size, +                dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_mknod(xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev, +             struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_mkdir(xlator_t *subvol, loc_t *loc, mode_t mode, struct iatt *iatt, +             dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_link(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, struct iatt *iatt, +            dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_fsyncdir(xlator_t *subvol, fd_t *fd, int datasync, dict_t *xdata_in, +                dict_t **xdata_out); + +int +syncop_access(xlator_t *subvol, loc_t *loc, int32_t mask, dict_t *xdata_in, +              dict_t **xdata_out); + +int +syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset, +                 size_t len, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len, +               dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, off_t len, +                dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_rename(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *xdata_in, +              dict_t **xdata_out); + +int +syncop_lk(xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock, +          dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_inodelk(xlator_t *subvol, const char *volume, loc_t *loc, int32_t cmd, +               struct gf_flock *lock, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_lease(xlator_t *subvol, loc_t *loc, struct gf_lease *lease, +             dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_ipc(xlator_t *subvol, int op, dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_xattrop(xlator_t *subvol, loc_t *loc, gf_xattrop_flags_t flags, +               dict_t *dict, dict_t *xdata_in, dict_t **dict_out, +               dict_t **xdata_out); + +int +syncop_fxattrop(xlator_t *subvol, fd_t *fd, gf_xattrop_flags_t flags, +                dict_t *dict, dict_t *xdata_in, dict_t **dict_out, +                dict_t **xdata_out); + +int +syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what, +            dict_t *xdata_in, off_t *off); + +int +syncop_getactivelk(xlator_t *subvol, loc_t *loc, +                   lock_migration_info_t *locklist, dict_t *xdata_in, +                   dict_t **xdata_out); + +int +syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                       int32_t op_ret, int32_t op_errno, dict_t *xdata); + +int +syncop_setactivelk(xlator_t *subvol, loc_t *loc, +                   lock_migration_info_t *locklist, dict_t *xdata_in, +                   dict_t **xdata_out); + +int +syncop_put(xlator_t *subvol, loc_t *loc, mode_t mode, mode_t umask, +           uint32_t flags, struct iovec *vector, int32_t count, off_t offset, +           struct iobref *iobref, dict_t *xattr, struct iatt *iatt, +           dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                       int32_t op_ret, int32_t op_errno, dict_t *xdata); + +int +syncop_icreate(xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *xdata_out); + +int +syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc, +               const char *basename, entrylk_cmd cmd, entrylk_type type, +               dict_t *xdata_in, dict_t **xdata_out); + +#endif /* _SYNCOP_H */ | 
