summaryrefslogtreecommitdiffstats
path: root/xlators/storage/bdb/src/bdb.h
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/bdb/src/bdb.h')
-rw-r--r--xlators/storage/bdb/src/bdb.h439
1 files changed, 439 insertions, 0 deletions
diff --git a/xlators/storage/bdb/src/bdb.h b/xlators/storage/bdb/src/bdb.h
new file mode 100644
index 000000000..f2d962680
--- /dev/null
+++ b/xlators/storage/bdb/src/bdb.h
@@ -0,0 +1,439 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _BDB_H
+#define _BDB_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <db.h>
+
+#ifdef linux
+#ifdef __GLIBC__
+#include <sys/fsuid.h>
+#else
+#include <unistd.h>
+#endif
+#endif
+
+#ifdef HAVE_SYS_XATTR_H
+#include <sys/xattr.h>
+#endif
+
+#ifdef HAVE_SYS_EXTATTR_H
+#include <sys/extattr.h>
+#endif
+
+#include <pthread.h>
+#include "xlator.h"
+#include "inode.h"
+#include "compat.h"
+#include "compat-errno.h"
+
+#define GLFS_BDB_STORAGE "/glusterfs_storage.db"
+
+/* numbers are not so reader-friendly, so lets have ON and OFF macros */
+#define ON 1
+#define OFF 0
+
+#define BDB_DEFAULT_LRU_LIMIT 100
+#define BDB_DEFAULT_HASH_SIZE 100
+
+#define BDB_ENOSPC_THRESHOLD 25600
+
+#define BDB_DEFAULT_CHECKPOINT_TIMEOUT 30
+
+#define BCTX_ENV(bctx) (bctx->table->dbenv)
+/* MAKE_REAL_PATH(var,this,path)
+ * make the real path on the underlying file-system
+ *
+ * @var: destination to hold the real path
+ * @this: pointer to xlator_t corresponding to bdb xlator
+ * @path: path, as seen from mount-point
+ */
+#define MAKE_REAL_PATH(var, this, path) do { \
+ int base_len = ((struct bdb_private *)this->private)->export_path_length; \
+ var = alloca (strlen (path) + base_len + 2); \
+ strcpy (var, ((struct bdb_private *)this->private)->export_path); \
+ strcpy (&var[base_len], path); \
+ } while (0)
+
+/* MAKE_REAL_PATH_TO_STORAGE_DB(var,this,path)
+ * make the real path to the storage-database file on file-system
+ *
+ * @var: destination to hold the real path
+ * @this: pointer to xlator_t corresponding to bdb xlator
+ * @path: path of the directory, as seen from mount-point
+ */
+#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \
+ int base_len = ((struct bdb_private *)this->private)->export_path_length; \
+ var = alloca (strlen (path) + base_len + strlen (GLFS_BDB_STORAGE)); \
+ strcpy (var, ((struct bdb_private *)this->private)->export_path); \
+ strcpy (&var[base_len], path); \
+ strcat (var, GLFS_BDB_STORAGE); \
+ } while (0)
+
+/* MAKE_KEY_FROM_PATH(key,path)
+ * make a 'key', which we use as key in the underlying database by using the path
+ *
+ * @key: destination to hold the key
+ * @path: path to file as seen from mount-point
+ */
+#define MAKE_KEY_FROM_PATH(key, path) do { \
+ char *tmp = alloca (strlen (path)); \
+ strcpy (tmp, path); \
+ key = basename (tmp); \
+ }while (0);
+
+/* BDB_DO_LSTAT(path,stbuf,dirent)
+ * construct real-path to a dirent and do lstat on the real-path
+ *
+ * @path: path to the directory whose readdir is currently in progress
+ * @stbuf: a 'struct stat *'
+ * @dirent: a 'struct dirent *'
+ */
+#define BDB_DO_LSTAT(path, stbuf, dirent) do { \
+ char tmp_real_path[GF_PATH_MAX]; \
+ strcpy(tmp_real_path, path); \
+ strcat (tmp_real_path, "/"); \
+ strcat(tmp_real_path, dirent->d_name); \
+ ret = lstat (tmp_real_path, stbuf); \
+ } while(0);
+
+/* IS_BDB_PRIVATE_FILE(name)
+ * check if a given 'name' is bdb xlator's internal file name
+ *
+ * @name: basename of a file.
+ *
+ * bdb xlator reserves file names 'glusterfs_storage.db',
+ * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*' (used by libdb)
+ */
+#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \
+ (!strcmp(name, "glusterfs_storage.db")) || \
+ (!strcmp(name, "glusterfs_ns.db")) || \
+ (!strncmp(name, "log.0000", 8)))
+
+/* check if 'name' is '.' or '..' entry */
+#define IS_DOT_DOTDOT(name) ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2)))
+
+/* BDB_SET_BCTX(this,inode,bctx)
+ * put a stamp on inode. d00d, you are using bdb.. huhaha.
+ * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories.
+ * this will happen either in lookup() or mkdir().
+ *
+ * @this: pointer xlator_t of bdb xlator.
+ * @inode: inode where 'struct bdb_ctx *' has to be stored.
+ * @bctx: a 'struct bdb_ctx *'
+ */
+#define BDB_SET_BCTX(this,inode,bctx) do{ \
+ inode_ctx_put(inode, this, (uint64_t)(long)bctx); \
+ }while (0);
+
+/* MAKE_BCTX_FROM_INODE(this,bctx,inode)
+ * extract bdb xlator's 'struct bdb_ctx *' from an inode's ctx.
+ * valid only if done for directory inodes, otherwise bctx = NULL.
+ *
+ * @this: pointer xlator_t of bdb xlator.
+ * @bctx: a 'struct bdb_ctx *'
+ * @inode: inode from where 'struct bdb_ctx *' has to be extracted.
+ */
+#define MAKE_BCTX_FROM_INODE(this,bctx,inode) do{ \
+ uint64_t tmp_bctx = 0; \
+ inode_ctx_get (inode, this, &tmp_bctx); \
+ if (ret == 0) \
+ bctx = (void *)(long)tmp_bctx; \
+ }while (0);
+
+#define BDB_SET_BFD(this,fd,bfd) do{ \
+ fd_ctx_set (fd, this, (uint64_t)(long)bfd); \
+ }while (0);
+
+/* maximum number of open dbs that bdb xlator will ever have */
+#define BDB_MAX_OPEN_DBS 100
+
+/* convert file size to block-count */
+#define BDB_COUNT_BLOCKS(size,blksize) (((size + blksize - 1)/blksize) - 1)
+
+/* file permissions, again macros are more readable */
+#define RWXRWXRWX 0777
+#define DEFAULT_FILE_MODE 0644
+#define DEFAULT_DIR_MODE 0755
+
+/* see, if have a valid file permissions specification in @mode */
+#define IS_VALID_FILE_MODE(mode) (!(mode & (~RWXRWXRWX)))
+#define IS_VALID_DIR_MODE(mode) (!(mode & (~(RWXRWXRWX)))
+
+/* maximum retries for a failed transactional operation */
+#define BDB_MAX_RETRIES 10
+
+typedef struct bctx_table bctx_table_t;
+typedef struct bdb_ctx bctx_t;
+typedef struct bdb_cache bdb_cache_t;
+typedef struct bdb_private bdb_private_t;
+
+struct bctx_table {
+ uint64_t dbflags; /* flags to be used for opening each database */
+ uint64_t cache; /* cache: can be either ON or OFF */
+ gf_lock_t lock; /* used to lock the 'struct bctx_table *' */
+ gf_lock_t checkpoint_lock; /* lock for checkpointing */
+ struct list_head *b_hash; /* hash table of 'struct bdb_ctx' */
+ struct list_head active; /* list of active 'struct bdb_ctx' */
+ struct list_head b_lru; /* lru list of inactive 'struct bdb_ctx' */
+ struct list_head purge;
+ uint32_t lru_limit;
+ uint32_t lru_size;
+ uint32_t hash_size;
+ DBTYPE access_mode; /* access mode for accessing the databases,
+ * can be DB_HASH, DB_BTREE */
+ DB_ENV *dbenv; /* DB_ENV under which every db operation
+ * is carried over */
+ int32_t transaction;
+ xlator_t *this;
+
+ uint64_t page_size; /* page-size of DB,
+ * DB->set_pagesize(), should be set before DB->open */
+};
+
+struct bdb_ctx {
+ /* controller members */
+ struct list_head list; /* lru list of 'struct bdb_ctx's,
+ * a bdb_ctx can exist in one of b_hash or lru lists */
+ struct list_head b_hash; /* directory 'name' hashed list of 'struct bdb_ctx's */
+
+ struct bctx_table *table;
+ int32_t ref; /* reference count */
+ gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */
+
+ char *directory; /* directory path */
+ DB *dbp; /* pointer to open database, that resides inside this directory */
+ uint32_t cache; /* cache ON or OFF */
+
+ /* per directory cache, bdb xlator's internal cache */
+ struct list_head c_list; /* linked list of cached records */
+ int32_t c_count; /* number of cached records */
+
+ int32_t key_hash; /* index to hash table list, to which this ctx belongs */
+ char *db_path; /* absolute path to db file */
+};
+
+struct bdb_fd {
+ struct bdb_ctx *ctx; /* pointer to bdb_ctx of the parent directory */
+ char *key; /* name of the file. NOTE: basename, not the complete path */
+ int32_t flags; /* open flags */
+};
+
+struct bdb_dir {
+ struct bdb_ctx *ctx; /* pointer to bdb_ctx of this directory */
+ DIR *dir; /* open directory pointer, as returned by opendir() */
+ char offset[NAME_MAX]; /* FIXME: readdir offset, too crude. must go */
+ char *path; /* path to this directory */
+};
+
+/* cache */
+struct bdb_cache {
+ struct list_head c_list; /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */
+ char *key; /* name of the file this cache holds. NOTE: basename of file */
+ char *data; /* file content */
+ size_t size; /* size of the file content that this cache holds */
+};
+
+
+struct bdb_private {
+ inode_table_t *itable; /* pointer to inode table that we use */
+ int32_t temp; /**/
+ char is_stateless; /**/
+ char *export_path; /* path to the export directory
+ * (option directory <export-path>) */
+ int32_t export_path_length; /* length of 'export_path' string */
+
+ /* statistics */
+ struct xlator_stats stats; /* Statistics, provides activity of the server */
+
+ struct timeval prev_fetch_time;
+ struct timeval init_time;
+ int32_t max_read; /* */
+ int32_t max_write; /* */
+ int64_t interval_read; /* Used to calculate the max_read value */
+ int64_t interval_write; /* Used to calculate the max_write value */
+ int64_t read_value; /* Total read, from init */
+ int64_t write_value; /* Total write, from init */
+
+ /* bdb xlator specific private data */
+ uint64_t envflags; /* flags used for opening DB_ENV for this xlator */
+ uint64_t dbflags; /* flags to be used for opening each database */
+ uint64_t cache; /* cache: can be either ON or OFF */
+ uint32_t transaction; /* transaction: can be either ON or OFF */
+ uint32_t active;
+ gf_lock_t active_lock;
+ struct bctx_table *b_table;
+ DBTYPE access_mode; /* access mode for accessing the databases,
+ * can be DB_HASH, DB_BTREE
+ * (option access-mode <mode>) */
+ mode_t file_mode; /* mode for each and every file stored on bdb
+ * (option file-mode <mode>) */
+ mode_t dir_mode; /* mode for each and every directory stored on bdb
+ * (option dir-mode <mode>) */
+ mode_t symlink_mode; /* mode for each and every symlink stored on bdb */
+ pthread_t checkpoint_thread; /* pthread_t object used for creating checkpoint
+ * thread */
+ int32_t checkpoint_timeout; /* time duration between two consecutive checkpoint
+ * operations.
+ * (option checkpoint-timeout <time-in-seconds>) */
+ ino_t next_ino; /* inode number allocation counter */
+ gf_lock_t ino_lock; /* lock to protect 'next_ino' */
+ char *logdir; /* environment log directory
+ * (option logdir <directory>) */
+ char *errfile; /* errfile path, used by environment to
+ * print detailed error log.
+ * (option errfile <errfile-path>) */
+ FILE *errfp; /* DB_ENV->set_errfile() expects us to fopen
+ * the errfile before doing DB_ENV->set_errfile() */
+ uint32_t txn_timeout; /* used by DB_ENV->set_timeout to set the timeout for
+ * a transactionally encapsulated DB->operation() to
+ * timeout before waiting for locks to be released.
+ * (option transaction-timeout <time-in-milliseconds>)
+ */
+ uint32_t lock_timeout;
+ uint32_t log_auto_remove; /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/
+ uint32_t log_region_max;
+};
+
+
+static inline int32_t
+bdb_txn_begin (DB_ENV *dbenv,
+ DB_TXN **ptxnid)
+{
+ return dbenv->txn_begin (dbenv, NULL, ptxnid, 0);
+}
+
+static inline int32_t
+bdb_txn_abort (DB_TXN *txnid)
+{
+ return txnid->abort (txnid);
+}
+
+static inline int32_t
+bdb_txn_commit (DB_TXN *txnid)
+{
+ return txnid->commit (txnid, 0);
+}
+
+inline void *
+bdb_extract_bfd (fd_t *fd, xlator_t *this);
+
+
+void *
+bdb_db_stat (bctx_t *bctx,
+ DB_TXN *txnid,
+ uint32_t flags);
+
+int32_t
+bdb_db_get(struct bdb_ctx *bctx,
+ DB_TXN *txnid,
+ const char *key_string,
+ char **buf,
+ size_t size,
+ off_t offset);
+
+#define BDB_TRUNCATE_RECORD 0xcafebabe
+
+int32_t
+bdb_db_put (struct bdb_ctx *bctx,
+ DB_TXN *txnid,
+ const char *key_string,
+ const char *buf,
+ size_t size,
+ off_t offset,
+ int32_t flags);
+
+int32_t
+bdb_db_del (struct bdb_ctx *bctx,
+ DB_TXN *txnid,
+ const char *path);
+
+ino_t
+bdb_inode_transform (ino_t parent,
+ struct bdb_ctx *bctx);
+
+
+int32_t
+bdb_cursor_open (struct bdb_ctx *bctx,
+ DBC **cursorp);
+
+int32_t
+bdb_cursor_get (DBC *cursorp,
+ DBT *key,
+ DBT *value,
+ int32_t flags);
+
+
+int32_t
+bdb_cursor_close (struct bdb_ctx *ctx,
+ DBC *cursorp);
+
+
+int32_t
+bdb_dirent_size (DBT *key);
+
+int32_t
+dirent_size (struct dirent *entry);
+
+int
+bdb_db_init (xlator_t *this,
+ dict_t *options);
+
+void
+bdb_dbs_from_dict_close (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data);
+
+bctx_t *
+bctx_lookup (struct bctx_table *table,
+ const char *path);
+
+bctx_t *
+bctx_parent
+(struct bctx_table *table,
+ const char *path);
+
+bctx_t *
+bctx_unref (bctx_t *ctx);
+
+bctx_t *
+bctx_ref (bctx_t *ctx);
+
+bctx_t *
+bctx_rename (bctx_t *bctx,
+ const char *db_newpath);
+
+int32_t
+bdb_db_rename (bctx_table_t *table,
+ const char *tmp_db_newpath,
+ const char *real_db_newpath);
+#endif /* _BDB_H */