From ea08bf886732d9680f2d6de19f3d68908a55143b Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Wed, 11 Jul 2012 22:25:30 +0530 Subject: core: remove unused code BUG: 764890 Change-Id: Ia8bcaa7a4daeb706bcb0bba24b2e634e9ca20d49 Signed-off-by: Amar Tumballi Reviewed-on: http://review.gluster.com/3657 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/storage/bdb/Makefile.am | 3 - xlators/storage/bdb/src/Makefile.am | 18 - xlators/storage/bdb/src/bctx.c | 341 --- xlators/storage/bdb/src/bdb-ll.c | 1464 ------------- xlators/storage/bdb/src/bdb-mem-types.h | 42 - xlators/storage/bdb/src/bdb.c | 3603 ------------------------------- xlators/storage/bdb/src/bdb.h | 530 ----- 7 files changed, 6001 deletions(-) delete mode 100644 xlators/storage/bdb/Makefile.am delete mode 100644 xlators/storage/bdb/src/Makefile.am delete mode 100644 xlators/storage/bdb/src/bctx.c delete mode 100644 xlators/storage/bdb/src/bdb-ll.c delete mode 100644 xlators/storage/bdb/src/bdb-mem-types.h delete mode 100644 xlators/storage/bdb/src/bdb.c delete mode 100644 xlators/storage/bdb/src/bdb.h (limited to 'xlators/storage') diff --git a/xlators/storage/bdb/Makefile.am b/xlators/storage/bdb/Makefile.am deleted file mode 100644 index d471a3f92..000000000 --- a/xlators/storage/bdb/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/storage/bdb/src/Makefile.am b/xlators/storage/bdb/src/Makefile.am deleted file mode 100644 index 7e2376979..000000000 --- a/xlators/storage/bdb/src/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ - -xlator_LTLIBRARIES = bdb.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/storage - -bdb_la_LDFLAGS = -module -avoidversion - -bdb_la_SOURCES = bctx.c bdb-ll.c bdb.c -bdb_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = bdb.h - -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) - -AM_LDFLAGS = -ldb - -CLEANFILES = - diff --git a/xlators/storage/bdb/src/bctx.c b/xlators/storage/bdb/src/bctx.c deleted file mode 100644 index 61560edfa..000000000 --- a/xlators/storage/bdb/src/bctx.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - Copyright (c) 2008-2011 Gluster, Inc. - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - . -*/ - -#include -#include -#include /* for dirname */ - -static void -__destroy_bctx (bctx_t *bctx) -{ - if (bctx->directory) - GF_FREE (bctx->directory); - - if (bctx->db_path) - GF_FREE (bctx->db_path); - - GF_FREE (bctx); -} - -static void -__unhash_bctx (bctx_t *bctx) -{ - list_del_init (&bctx->b_hash); -} - -static int32_t -bctx_table_prune (bctx_table_t *table) -{ - int32_t ret = 0; - struct list_head purge = {0,}; - struct list_head *next = NULL; - bctx_t *entry = NULL; - bctx_t *del = NULL, *tmp = NULL; - - if (!table) - return 0; - - INIT_LIST_HEAD (&purge); - - LOCK (&table->lock); - { - if ((table->lru_limit) && - (table->lru_size > table->lru_limit)) { - while (table->lru_size > table->lru_limit) { - next = table->b_lru.next; - entry = list_entry (next, bctx_t, list); - - list_move_tail (next, &table->purge); - __unhash_bctx (entry); - - table->lru_size--; - ret++; - } - } - list_move_tail (&purge, &table->purge); - list_del_init (&table->purge); - } - UNLOCK (&table->lock); - - list_for_each_entry_safe (del, tmp, &purge, list) { - list_del_init (&del->list); - if (del->primary) { - ret = del->primary->close (del->primary, 0); - if (ret != 0) { - gf_log (table->this->name, GF_LOG_DEBUG, - "_BCTX_TABLE_PRUNE %s: %s " - "(failed to close primary database)", - del->directory, db_strerror (ret)); - } else { - gf_log (table->this->name, GF_LOG_DEBUG, - "_BCTX_TABLE_PRUNE %s (lru=%d)" - "(closed primary database)", - del->directory, table->lru_size); - } - } - if (del->secondary) { - ret = del->secondary->close (del->secondary, 0); - if (ret != 0) { - gf_log (table->this->name, GF_LOG_DEBUG, - "_BCTX_TABLE_PRUNE %s: %s " - "(failed to close secondary database)", - del->directory, db_strerror (ret)); - } else { - gf_log (table->this->name, GF_LOG_DEBUG, - "_BCTX_TABLE_PRUNE %s (lru=%d)" - "(closed secondary database)", - del->directory, table->lru_size); - } - } - __destroy_bctx (del); - } - - return ret; -} - - -/* struct bdb_ctx related */ -static inline uint32_t -bdb_key_hash (char *key, uint32_t hash_size) -{ - uint32_t hash = 0; - - hash = *key; - - if (hash) { - for (key += 1; *key != '\0'; key++) { - hash = (hash << 5) - hash + *key; - } - } - - return (hash + *key) % hash_size; -} - -static void -__hash_bctx (bctx_t *bctx) -{ - bctx_table_t *table = NULL; - char *key = NULL; - - table = bctx->table; - - MAKE_KEY_FROM_PATH (key, bctx->directory); - bctx->key_hash = bdb_key_hash (key, table->hash_size); - - list_del_init (&bctx->b_hash); - list_add (&bctx->b_hash, &table->b_hash[bctx->key_hash]); -} - -static inline bctx_t * -__bctx_passivate (bctx_t *bctx) -{ - if (bctx->primary) { - list_move_tail (&bctx->list, &(bctx->table->b_lru)); - bctx->table->lru_size++; - } else { - list_move_tail (&bctx->list, &bctx->table->purge); - __unhash_bctx (bctx); - } - return bctx; -} - -static inline bctx_t * -__bctx_activate (bctx_t *bctx) -{ - list_move (&bctx->list, &bctx->table->active); - bctx->table->lru_size--; - - return bctx; -} - -static bctx_t * -__bdb_ctx_unref (bctx_t *bctx) -{ - GF_ASSERT (bctx->ref); - - --bctx->ref; - - if (!bctx->ref) - bctx = __bctx_passivate (bctx); - - return bctx; -} - - -bctx_t * -bctx_unref (bctx_t *bctx) -{ - bctx_table_t *table = NULL; - - if (!bctx && !bctx->table) - return NULL; - - table = bctx->table; - - LOCK (&table->lock); - { - bctx = __bdb_ctx_unref (bctx); - } - UNLOCK (&table->lock); - - bctx_table_prune (table); - - return bctx; -} - -/* - * NOTE: __bdb_ctx_ref() is called only after holding table->lock and - * bctx->lock, in that order - */ -static inline bctx_t * -__bctx_ref (bctx_t *bctx) -{ - if (!bctx->ref) - __bctx_activate (bctx); - - bctx->ref++; - - return bctx; -} - -bctx_t * -bctx_ref (bctx_t *bctx) -{ - LOCK (&(bctx->table->lock)); - { - __bctx_ref (bctx); - } - UNLOCK (&(bctx->table->lock)); - - return bctx; -} - - -#define BDB_THIS(table) (table->this) - -static inline bctx_t * -__create_bctx (bctx_table_t *table, - const char *path) -{ - bctx_t *bctx = NULL; - char *db_path = NULL; - - bctx = GF_CALLOC (1, sizeof (*bctx), gf_bdb_mt_bctx_t); - GF_VALIDATE_OR_GOTO ("bctx", bctx, out); - - bctx->table = table; - bctx->directory = gf_strdup (path); - GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out); - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, BDB_THIS (table), path); - - bctx->db_path = gf_strdup (db_path); - GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out); - - INIT_LIST_HEAD (&bctx->c_list); - INIT_LIST_HEAD (&bctx->list); - INIT_LIST_HEAD (&bctx->b_hash); - - LOCK_INIT (&bctx->lock); - - __hash_bctx (bctx); - - list_add (&bctx->list, &table->b_lru); - table->lru_size++; - -out: - return bctx; -} - -/* bctx_lookup - lookup bctx_t for the directory @directory. - * (see description of bctx_t in bdb.h) - * - * @table: bctx_table_t for this instance of bdb. - * @directory: directory for which bctx_t is being looked up. - */ -bctx_t * -bctx_lookup (bctx_table_t *table, - const char *directory) -{ - char *key = NULL; - uint32_t key_hash = 0; - bctx_t *trav = NULL, *bctx = NULL, *tmp = NULL; - int32_t need_break = 0; - - GF_VALIDATE_OR_GOTO ("bctx", table, out); - GF_VALIDATE_OR_GOTO ("bctx", directory, out); - - MAKE_KEY_FROM_PATH (key, directory); - key_hash = bdb_key_hash (key, table->hash_size); - - LOCK (&table->lock); - { - if (list_empty (&table->b_hash[key_hash])) { - goto creat_bctx; - } - - list_for_each_entry_safe (trav, tmp, &table->b_hash[key_hash], - b_hash) { - LOCK(&trav->lock); - { - if (!strcmp(trav->directory, directory)) { - bctx = __bctx_ref (trav); - need_break = 1; - } - } - UNLOCK(&trav->lock); - - if (need_break) - break; - } - - creat_bctx: - if (!bctx) { - bctx = __create_bctx (table, directory); - bctx = __bctx_ref (bctx); - } - } - UNLOCK (&table->lock); -out: - return bctx; -} - - -bctx_t * -bctx_parent (bctx_table_t *table, - const char *path) -{ - char *pathname = NULL, *directory = NULL; - bctx_t *bctx = NULL; - - GF_VALIDATE_OR_GOTO ("bctx", table, out); - GF_VALIDATE_OR_GOTO ("bctx", path, out); - - pathname = gf_strdup (path); - GF_VALIDATE_OR_GOTO ("bctx", pathname, out); - directory = dirname (pathname); - - bctx = bctx_lookup (table, directory); - GF_VALIDATE_OR_GOTO ("bctx", bctx, out); - -out: - if (pathname) - free (pathname); - return bctx; -} diff --git a/xlators/storage/bdb/src/bdb-ll.c b/xlators/storage/bdb/src/bdb-ll.c deleted file mode 100644 index f70ec47f4..000000000 --- a/xlators/storage/bdb/src/bdb-ll.c +++ /dev/null @@ -1,1464 +0,0 @@ -/* - Copyright (c) 2008-2011 Gluster, Inc. - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - . -*/ - -#include -#include "bdb.h" -#include -#include "hashfn.h" -/* - * implement the procedures to interact with bdb */ - -/**************************************************************** - * - * General wrappers and utility procedures for bdb xlator - * - ****************************************************************/ - -ino_t -bdb_inode_transform (ino_t parent, - const char *name, - size_t namelen) -{ - ino_t ino = -1; - uint64_t hash = 0; - - hash = gf_dm_hashfn (name, namelen); - - ino = (((parent << 32) | 0x00000000ffffffffULL) - & (hash | 0xffffffff00000000ULL)); - - return ino; -} - -static int -bdb_generate_secondary_hash (DB *secondary, - const DBT *pkey, - const DBT *data, - DBT *skey) -{ - char *primary = NULL; - uint32_t *hash = NULL; - - primary = pkey->data; - - hash = GF_CALLOC (1, sizeof (uint32_t), gf_bdb_mt_uint32_t); - - *hash = gf_dm_hashfn (primary, pkey->size); - - skey->data = hash; - skey->size = sizeof (hash); - skey->flags = DB_DBT_APPMALLOC; - - return 0; -} - -/*********************************************************** - * - * bdb storage database utilities - * - **********************************************************/ - -/* - * bdb_db_open - opens a storage db. - * - * @ctx: context specific to the directory for which we are supposed to open db - * - * see, if we have empty slots to open a db. - * if (no-empty-slots), then prune open dbs and close as many as possible - * if (empty-slot-available), tika muchkonDu db open maaDu - * - */ -static int -bdb_db_open (bctx_t *bctx) -{ - DB *primary = NULL; - DB *secondary = NULL; - int32_t ret = -1; - bctx_table_t *table = NULL; - - GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - - table = bctx->table; - GF_VALIDATE_OR_GOTO ("bdb-ll", table, out); - - /* we have to do the following, we can't deny someone of db_open ;) */ - ret = db_create (&primary, table->dbenv, 0); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_OPEN %s: %s (failed to create database object" - " for primary database)", - bctx->directory, db_strerror (ret)); - ret = -ENOMEM; - goto out; - } - - if (table->page_size) { - ret = primary->set_pagesize (primary, - table->page_size); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_OPEN %s: %s (failed to set page-size " - "to %"PRIu64")", - bctx->directory, db_strerror (ret), - table->page_size); - } else { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_OPEN %s: page-size set to %"PRIu64, - bctx->directory, table->page_size); - } - } - - ret = primary->open (primary, NULL, bctx->db_path, "primary", - table->access_mode, table->dbflags, 0); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "_BDB_DB_OPEN %s: %s " - "(failed to open primary database)", - bctx->directory, db_strerror (ret)); - ret = -1; - goto cleanup; - } - - ret = db_create (&secondary, table->dbenv, 0); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_OPEN %s: %s (failed to create database object" - " for secondary database)", - bctx->directory, db_strerror (ret)); - ret = -ENOMEM; - goto cleanup; - } - - ret = secondary->open (secondary, NULL, bctx->db_path, "secondary", - table->access_mode, table->dbflags, 0); - if (ret != 0 ) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "_BDB_DB_OPEN %s: %s " - "(failed to open secondary database)", - bctx->directory, db_strerror (ret)); - ret = -1; - goto cleanup; - } - - ret = primary->associate (primary, NULL, secondary, - bdb_generate_secondary_hash, -#ifdef DB_IMMUTABLE_KEY - DB_IMMUTABLE_KEY); -#else - 0); -#endif - if (ret != 0 ) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "_BDB_DB_OPEN %s: %s " - "(failed to associate primary database with " - "secondary database)", - bctx->directory, db_strerror (ret)); - ret = -1; - goto cleanup; - } - -out: - bctx->primary = primary; - bctx->secondary = secondary; - - return ret; -cleanup: - if (primary) - primary->close (primary, 0); - if (secondary) - secondary->close (secondary, 0); - - return ret; -} - -int32_t -bdb_cursor_close (bctx_t *bctx, - DBC *cursorp) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out); - - LOCK (&bctx->lock); - { -#ifdef HAVE_BDB_CURSOR_GET - ret = cursorp->close (cursorp); -#else - ret = cursorp->c_close (cursorp); -#endif - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CURSOR_CLOSE %s: %s " - "(failed to close database cursor)", - bctx->directory, db_strerror (ret)); - } - } - UNLOCK (&bctx->lock); - -out: - return ret; -} - - -int32_t -bdb_cursor_open (bctx_t *bctx, - DBC **cursorpp) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - GF_VALIDATE_OR_GOTO ("bdb-ll", cursorpp, out); - - LOCK (&bctx->lock); - { - if (bctx->secondary) { - /* do nothing, just continue */ - ret = 0; - } else { - ret = bdb_db_open (bctx); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CURSOR_OPEN %s: ENOMEM " - "(failed to open secondary database)", - bctx->directory); - ret = -ENOMEM; - } else { - ret = 0; - } - } - - if (ret == 0) { - /* all set, open cursor */ - ret = bctx->secondary->cursor (bctx->secondary, - NULL, cursorpp, 0); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CURSOR_OPEN %s: %s " - "(failed to open a cursor to database)", - bctx->directory, db_strerror (ret)); - } - } - } - UNLOCK (&bctx->lock); - -out: - return ret; -} - - -/* cache related */ -static bdb_cache_t * -bdb_cache_lookup (bctx_t *bctx, - char *path) -{ - bdb_cache_t *bcache = NULL; - bdb_cache_t *trav = NULL; - char *key = NULL; - - GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - GF_VALIDATE_OR_GOTO ("bdb-ll", path, out); - - MAKE_KEY_FROM_PATH (key, path); - - LOCK (&bctx->lock); - { - list_for_each_entry (trav, &bctx->c_list, c_list) { - if (!strcmp (trav->key, key)){ - bcache = trav; - break; - } - } - } - UNLOCK (&bctx->lock); - -out: - return bcache; -} - -static int32_t -bdb_cache_insert (bctx_t *bctx, - DBT *key, - DBT *data) -{ - bdb_cache_t *bcache = NULL; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - GF_VALIDATE_OR_GOTO ("bdb-ll", key, out); - GF_VALIDATE_OR_GOTO ("bdb-ll", data, out); - - LOCK (&bctx->lock); - { - if (bctx->c_count > 5) { - /* most of the times, we enter here */ - /* FIXME: ugly, not supposed to disect any of the - * 'struct list_head' directly */ - if (!list_empty (&bctx->c_list)) { - bcache = list_entry (bctx->c_list.prev, - bdb_cache_t, c_list); - list_del_init (&bcache->c_list); - } - if (bcache->key) { - GF_FREE (bcache->key); - bcache->key = GF_CALLOC (key->size + 1, - sizeof (char), - gf_bdb_mt_char); - GF_VALIDATE_OR_GOTO ("bdb-ll", - bcache->key, unlock); - memcpy (bcache->key, (char *)key->data, - key->size); - } else { - /* should never come here */ - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CACHE_INSERT %s (%s) " - "(found a cache entry with empty key)", - bctx->directory, (char *)key->data); - } /* if(bcache->key)...else */ - if (bcache->data) { - GF_FREE (bcache->data); - bcache->data = memdup (data->data, data->size); - GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, - unlock); - bcache->size = data->size; - } else { - /* should never come here */ - gf_log ("bdb-ll", GF_LOG_CRITICAL, - "_BDB_CACHE_INSERT %s (%s) " - "(found a cache entry with no data)", - bctx->directory, (char *)key->data); - } /* if(bcache->data)...else */ - list_add (&bcache->c_list, &bctx->c_list); - ret = 0; - } else { - /* we will be entering here very rarely */ - bcache = GF_CALLOC (1, sizeof (*bcache), - gf_bdb_mt_bdb_cache_t); - GF_VALIDATE_OR_GOTO ("bdb-ll", bcache, unlock); - - bcache->key = GF_CALLOC (key->size + 1, sizeof (char), - gf_bdb_mt_char); - GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock); - memcpy (bcache->key, key->data, key->size); - - bcache->data = memdup (data->data, data->size); - GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock); - - bcache->size = data->size; - list_add (&bcache->c_list, &bctx->c_list); - bctx->c_count++; - ret = 0; - } /* if(private->c_count < 5)...else */ - } -unlock: - UNLOCK (&bctx->lock); -out: - return ret; -} - -static int32_t -bdb_cache_delete (bctx_t *bctx, - const char *key) -{ - bdb_cache_t *bcache = NULL; - bdb_cache_t *trav = NULL; - - GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - GF_VALIDATE_OR_GOTO ("bdb-ll", key, out); - - LOCK (&bctx->lock); - { - list_for_each_entry (trav, &bctx->c_list, c_list) { - if (!strcmp (trav->key, key)){ - bctx->c_count--; - bcache = trav; - break; - } - } - - if (bcache) { - list_del_init (&bcache->c_list); - GF_FREE (bcache->key); - GF_FREE (bcache->data); - GF_FREE (bcache); - } - } - UNLOCK (&bctx->lock); - -out: - return 0; -} - -void * -bdb_db_stat (bctx_t *bctx, - DB_TXN *txnid, - uint32_t flags) -{ - DB *storage = NULL; - void *stat = NULL; - int32_t ret = -1; - - LOCK (&bctx->lock); - { - if (bctx->primary == NULL) { - ret = bdb_db_open (bctx); - storage = bctx->primary; - } else { - /* we are just fine, lets continue */ - storage = bctx->primary; - } /* if(bctx->dbp==NULL)...else */ - } - UNLOCK (&bctx->lock); - - GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - - ret = storage->stat (storage, txnid, &stat, flags); - - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_STAT %s: %s " - "(failed to do stat database)", - bctx->directory, db_strerror (ret)); - } -out: - return stat; - -} - -/* bdb_storage_get - retrieve a key/value pair corresponding to @path from the - * corresponding db file. - * - * @bctx: bctx_t * corresponding to the parent directory of @path. (should - * always be a valid bctx). bdb_storage_get should never be called if - * @bctx = NULL. - * @txnid: NULL if bdb_storage_get is not embedded in an explicit transaction - * or a valid DB_TXN *, when embedded in an explicit transaction. - * @path: path of the file to read from (translated to a database key using - * MAKE_KEY_FROM_PATH) - * @buf: char ** - pointer to a pointer to char. a read buffer is created in - * this procedure and pointer to the buffer is passed through @buf to the - * caller. - * @size: size of the file content to be read. - * @offset: offset from which the file content to be read. - * - * NOTE: bdb_storage_get tries to open DB, if @bctx->dbp == NULL - * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by - * bdb_table_prune()). - * - * NOTE: if private->cache is set (bdb xlator's internal caching enabled), then - * bdb_storage_get first looks up the cache for key/value pair. if - * bdb_lookup_cache fails, then only DB->get() is called. also, inserts a - * newly read key/value pair to cache through bdb_insert_to_cache. - * - * return: 'number of bytes read' on success or -1 on error. - * - * also see: bdb_lookup_cache, bdb_insert_to_cache for details about bdb - * xlator's internal cache. - */ -static int32_t -bdb_db_get (bctx_t *bctx, - DB_TXN *txnid, - const char *path, - char *buf, - size_t size, - off_t offset) -{ - DB *storage = NULL; - DBT key = {0,}; - DBT value = {0,}; - int32_t ret = -1; - size_t copy_size = 0; - char *key_string = NULL; - bdb_cache_t *bcache = NULL; - int32_t db_flags = 0; - uint8_t need_break = 0; - int32_t retries = 1; - - GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - GF_VALIDATE_OR_GOTO ("bdb-ll", path, out); - - MAKE_KEY_FROM_PATH (key_string, path); - - if (bctx->cache && - ((bcache = bdb_cache_lookup (bctx, key_string)) != NULL)) { - if (buf) { - copy_size = ((bcache->size - offset) < size)? - (bcache->size - offset) : size; - - memcpy (buf, (bcache->data + offset), copy_size); - ret = copy_size; - } else { - ret = bcache->size; - } - - goto out; - } - - LOCK (&bctx->lock); - { - if (bctx->primary == NULL) { - ret = bdb_db_open (bctx); - storage = bctx->primary; - } else { - /* we are just fine, lets continue */ - storage = bctx->primary; - } /* if(bctx->dbp==NULL)...else */ - } - UNLOCK (&bctx->lock); - - GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - - key.data = (char *)key_string; - key.size = strlen (key_string); - key.flags = DB_DBT_USERMEM; - - if (bctx->cache){ - value.flags = DB_DBT_MALLOC; - } else { - if (size) { - value.data = buf; - value.ulen = size; - value.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; - } else { - value.flags = DB_DBT_MALLOC; - } - value.dlen = size; - value.doff = offset; - } - - do { - /* TODO: we prefer to give our own buffer to value.data - * and ask bdb to fill in it */ - ret = storage->get (storage, txnid, &key, &value, - db_flags); - - if (ret == DB_NOTFOUND) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_GET %s - %s: ENOENT" - "(specified key not found in database)", - bctx->directory, key_string); - ret = -1; - need_break = 1; - } else if (ret == DB_LOCK_DEADLOCK) { - retries++; - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_GET %s - %s" - "(deadlock detected, retrying for %d " - "time)", - bctx->directory, key_string, retries); - } else if (ret == 0) { - /* successfully read data, lets set everything - * in place and return */ - if (bctx->cache) { - if (buf) { - copy_size = ((value.size - offset) < size) ? - (value.size - offset) : size; - - memcpy (buf, (value.data + offset), - copy_size); - ret = copy_size; - } - - bdb_cache_insert (bctx, &key, &value); - } else { - ret = value.size; - } - - if (size == 0) - GF_FREE (value.data); - - need_break = 1; - } else { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_GET %s - %s: %s" - "(failed to retrieve specified key from" - " database)", - bctx->directory, key_string, - db_strerror (ret)); - ret = -1; - need_break = 1; - } - } while (!need_break); - -out: - return ret; -}/* bdb_db_get */ - -/* TODO: handle errors here and log. propogate only the errno to caller */ -int32_t -bdb_db_fread (struct bdb_fd *bfd, char *buf, size_t size, off_t offset) -{ - return bdb_db_get (bfd->ctx, NULL, bfd->key, buf, size, offset); -} - -int32_t -bdb_db_iread (struct bdb_ctx *bctx, const char *key, char **bufp) -{ - char *buf = NULL; - size_t size = 0; - int64_t ret = 0; - - ret = bdb_db_get (bctx, NULL, key, NULL, 0, 0); - size = ret; - - if (bufp) { - buf = GF_CALLOC (size, sizeof (char), gf_bdb_mt_char); - *bufp = buf; - ret = bdb_db_get (bctx, NULL, key, buf, size, 0); - } - - return ret; -} - -/* bdb_storage_put - insert a key/value specified to the corresponding DB. - * - * @bctx: bctx_t * corresponding to the parent directory of @path. - * (should always be a valid bctx). bdb_storage_put should never be - * called if @bctx = NULL. - * @txnid: NULL if bdb_storage_put is not embedded in an explicit transaction - * or a valid DB_TXN *, when embedded in an explicit transaction. - * @key_string: key of the database entry. - * @buf: pointer to the buffer data to be written as data for @key_string. - * @size: size of @buf. - * @offset: offset in the key's data to be modified with provided data. - * @flags: valid flags are BDB_TRUNCATE_RECORD (to reduce the data of - * @key_string to 0 size). - * - * NOTE: bdb_storage_put tries to open DB, if @bctx->dbp == NULL - * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by - * bdb_table_prune()). - * - * NOTE: bdb_storage_put deletes the key/value from bdb xlator's internal cache. - * - * return: 0 on success or -1 on error. - * - * also see: bdb_cache_delete for details on how a cached key/value pair is - * removed. - */ -static int32_t -bdb_db_put (bctx_t *bctx, - DB_TXN *txnid, - const char *key_string, - const char *buf, - size_t size, - off_t offset, - int32_t flags) -{ - DB *storage = NULL; - DBT key = {0,}, value = {0,}; - int32_t ret = -1; - int32_t db_flags = DB_AUTO_COMMIT; - uint8_t need_break = 0; - int32_t retries = 1; - - LOCK (&bctx->lock); - { - if (bctx->primary == NULL) { - ret = bdb_db_open (bctx); - storage = bctx->primary; - } else { - /* we are just fine, lets continue */ - storage = bctx->primary; - } - } - UNLOCK (&bctx->lock); - - GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - - if (bctx->cache) { - ret = bdb_cache_delete (bctx, (char *)key_string); - GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out); - } - - key.data = (void *)key_string; - key.size = strlen (key_string); - - /* NOTE: bdb lets us expand the file, suppose value.size > value.len, - * then value.len bytes from value.doff offset and value.size bytes - * will be written from value.doff and data from - * value.doff + value.dlen will be pushed value.doff + value.size - */ - value.data = (void *)buf; - - if (flags & BDB_TRUNCATE_RECORD) { - value.size = size; - value.doff = 0; - value.dlen = offset; - } else { - value.size = size; - value.dlen = size; - value.doff = offset; - } - value.flags = DB_DBT_PARTIAL; - if (buf == NULL && size == 0) - /* truncate called us */ - value.flags = 0; - - do { - ret = storage->put (storage, txnid, &key, &value, db_flags); - if (ret == DB_LOCK_DEADLOCK) { - retries++; - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_PUT %s - %s" - "(deadlock detected, retying for %d time)", - bctx->directory, key_string, retries); - } else if (ret) { - /* write failed */ - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_PUT %s - %s: %s" - "(failed to put specified entry into database)", - bctx->directory, key_string, db_strerror (ret)); - need_break = 1; - } else { - /* successfully wrote */ - ret = 0; - need_break = 1; - } - } while (!need_break); -out: - return ret; -}/* bdb_db_put */ - -int32_t -bdb_db_icreate (struct bdb_ctx *bctx, const char *key) -{ - return bdb_db_put (bctx, NULL, key, NULL, 0, 0, 0); -} - -/* TODO: handle errors here and log. propogate only the errno to caller */ -int32_t -bdb_db_fwrite (struct bdb_fd *bfd, char *buf, size_t size, off_t offset) -{ - return bdb_db_put (bfd->ctx, NULL, bfd->key, buf, size, offset, 0); -} - -/* TODO: handle errors here and log. propogate only the errno to caller */ -int32_t -bdb_db_iwrite (struct bdb_ctx *bctx, const char *key, char *buf, size_t size) -{ - return bdb_db_put (bctx, NULL, key, buf, size, 0, 0); -} - -int32_t -bdb_db_itruncate (struct bdb_ctx *bctx, const char *key) -{ - return bdb_db_put (bctx, NULL, key, NULL, 0, 1, 0); -} - -/* bdb_storage_del - delete a key/value pair corresponding to @path from - * corresponding db file. - * - * @bctx: bctx_t * corresponding to the parent directory of @path. - * (should always be a valid bctx). bdb_storage_del should never be called - * if @bctx = NULL. - * @txnid: NULL if bdb_storage_del is not embedded in an explicit transaction - * or a valid DB_TXN *, when embedded in an explicit transaction. - * @path: path to the file, whose key/value pair has to be deleted. - * - * NOTE: bdb_storage_del tries to open DB, if @bctx->dbp == NULL - * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by - * bdb_table_prune()). - * - * return: 0 on success or -1 on error. - */ -static int32_t -bdb_db_del (bctx_t *bctx, - DB_TXN *txnid, - const char *key_string) -{ - DB *storage = NULL; - DBT key = {0,}; - int32_t ret = -1; - int32_t db_flags = 0; - uint8_t need_break = 0; - int32_t retries = 1; - - LOCK (&bctx->lock); - { - if (bctx->primary == NULL) { - ret = bdb_db_open (bctx); - storage = bctx->primary; - } else { - /* we are just fine, lets continue */ - storage = bctx->primary; - } - } - UNLOCK (&bctx->lock); - - GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - - ret = bdb_cache_delete (bctx, key_string); - GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out); - - key.data = (char *)key_string; - key.size = strlen (key_string); - key.flags = DB_DBT_USERMEM; - - do { - ret = storage->del (storage, txnid, &key, db_flags); - - if (ret == DB_NOTFOUND) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_DEL %s - %s: ENOENT" - "(failed to delete entry, could not be " - "found in the database)", - bctx->directory, key_string); - need_break = 1; - } else if (ret == DB_LOCK_DEADLOCK) { - retries++; - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_DEL %s - %s" - "(deadlock detected, retying for %d time)", - bctx->directory, key_string, retries); - } else if (ret == 0) { - /* successfully deleted the entry */ - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_DEL %s - %s" - "(successfully deleted entry from database)", - bctx->directory, key_string); - ret = 0; - need_break = 1; - } else { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_DB_DEL %s - %s: %s" - "(failed to delete entry from database)", - bctx->directory, key_string, db_strerror (ret)); - ret = -1; - need_break = 1; - } - } while (!need_break); -out: - return ret; -} - -int32_t -bdb_db_iremove (bctx_t *bctx, - const char *key) -{ - return bdb_db_del (bctx, NULL, key); -} - -/* NOTE: bdb version compatibility wrapper */ -int32_t -bdb_cursor_get (DBC *cursorp, - DBT *sec, DBT *pri, - DBT *val, - int32_t flags) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out); - -#ifdef HAVE_BDB_CURSOR_GET - ret = cursorp->pget (cursorp, sec, pri, val, flags); -#else - ret = cursorp->c_pget (cursorp, sec, pri, val, flags); -#endif - if ((ret != 0) && (ret != DB_NOTFOUND)) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CURSOR_GET: %s" - "(failed to retrieve entry from database cursor)", - db_strerror (ret)); - } - -out: - return ret; -}/* bdb_cursor_get */ - -int32_t -bdb_dirent_size (DBT *key) -{ - return GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + key->size); -} - - - -/* bdb_dbenv_init - initialize DB_ENV - * - * initialization includes: - * 1. opening DB_ENV (db_env_create(), DB_ENV->open()). - * NOTE: see private->envflags for flags used. - * 2. DB_ENV->set_lg_dir - set log directory to be used for storing log files - * (log files are the files in which transaction logs are written by db). - * 3. DB_ENV->set_flags (DB_LOG_AUTOREMOVE) - set DB_ENV to automatically - * clear the unwanted log files (flushed at each checkpoint). - * 4. DB_ENV->set_errfile - set errfile to be used by db to report detailed - * error logs. used only for debbuging purpose. - * - * return: returns a valid DB_ENV * on success or NULL on error. - * - */ -static DB_ENV * -bdb_dbenv_init (xlator_t *this, - char *directory) -{ - /* Create a DB environment */ - DB_ENV *dbenv = NULL; - int32_t ret = 0; - bdb_private_t *private = NULL; - int32_t fatal_flags = 0; - - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (directory, err); - - private = this->private; - VALIDATE_OR_GOTO (private, err); - - ret = db_env_create (&dbenv, 0); - VALIDATE_OR_GOTO ((ret == 0), err); - - /* NOTE: set_errpfx returns 'void' */ - dbenv->set_errpfx(dbenv, this->name); - - ret = dbenv->set_lk_detect (dbenv, DB_LOCK_DEFAULT); - VALIDATE_OR_GOTO ((ret == 0), err); - - ret = dbenv->open(dbenv, directory, - private->envflags, - S_IRUSR | S_IWUSR); - if ((ret != 0) && (ret != DB_RUNRECOVERY)) { - gf_log (this->name, GF_LOG_CRITICAL, - "failed to join Berkeley DB environment at %s: %s." - "please run manual recovery and retry running " - "glusterfs", - directory, db_strerror (ret)); - dbenv = NULL; - goto err; - } else if (ret == DB_RUNRECOVERY) { - fatal_flags = ((private->envflags & (~DB_RECOVER)) - | DB_RECOVER_FATAL); - ret = dbenv->open(dbenv, directory, fatal_flags, - S_IRUSR | S_IWUSR); - if (ret != 0) { - gf_log (this->name, GF_LOG_CRITICAL, - "failed to join Berkeley DB environment in " - "recovery mode at %s: %s. please run manual " - "recovery and retry running glusterfs", - directory, db_strerror (ret)); - dbenv = NULL; - goto err; - } - } - - ret = 0; -#if (DB_VERSION_MAJOR == 4 && \ - DB_VERSION_MINOR == 7) - if (private->log_auto_remove) { - ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 1); - } else { - ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 0); - } -#else - if (private->log_auto_remove) { - ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 1); - } else { - ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 0); - } -#endif - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "autoremoval of transactional log files could not be " - "configured (%s). you may have to do a manual " - "monitoring of transactional log files and remove " - "periodically.", - db_strerror (ret)); - goto err; - } - - if (private->transaction) { - ret = dbenv->set_flags(dbenv, DB_AUTO_COMMIT, 1); - - if (ret != 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "configuration of auto-commit failed for " - "database environment at %s. none of the " - "operations will be embedded in transaction " - "unless explicitly done so.", - db_strerror (ret)); - goto err; - } - - if (private->txn_timeout) { - ret = dbenv->set_timeout (dbenv, private->txn_timeout, - DB_SET_TXN_TIMEOUT); - if (ret != 0) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "could not configure Berkeley DB " - "transaction timeout to %d (%s). please" - " review 'option transaction-timeout %d" - "' option.", - private->txn_timeout, - db_strerror (ret), - private->txn_timeout); - goto err; - } - } - - if (private->lock_timeout) { - ret = dbenv->set_timeout(dbenv, - private->txn_timeout, - DB_SET_LOCK_TIMEOUT); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "could not configure Berkeley DB " - "lock timeout to %d (%s). please" - " review 'option lock-timeout %d" - "' option.", - private->lock_timeout, - db_strerror (ret), - private->lock_timeout); - goto err; - } - } - - ret = dbenv->set_lg_dir (dbenv, private->logdir); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "failed to configure libdb transaction log " - "directory at %s. please review the " - "'option logdir %s' option.", - db_strerror (ret), private->logdir); - goto err; - } - } - - if (private->errfile) { - private->errfp = fopen (private->errfile, "a+"); - if (private->errfp) { - dbenv->set_errfile (dbenv, private->errfp); - } else { - gf_log ("bdb-ll", GF_LOG_ERROR, - "failed to open error logging file for " - "libdb (Berkeley DB) internal logging (%s)." - "please review the 'option errfile %s' option.", - strerror (errno), private->errfile); - goto err; - } - } - - return dbenv; -err: - if (dbenv) { - dbenv->close (dbenv, 0); - } - - return NULL; -} - -#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv) - -/* bdb_checkpoint - during transactional usage, db does not directly write the - * data to db files, instead db writes a 'log' (similar to a journal entry) - * into a log file. db normally clears the log files during opening of an - * environment. since we expect a filesystem server to run for a pretty long - * duration and flushing 'log's during dbenv->open would prove very costly, if - * we accumulate the log entries for one complete run of glusterfs server. to - * flush the logs frequently, db provides a mechanism called 'checkpointing'. - * when we do a checkpoint, db flushes the logs to disk (writes changes to db - * files) and we can also clear the accumulated log files after checkpointing. - * NOTE: removing unwanted log files is not part of dbenv->txn_checkpoint() - * call. - * - * @data: xlator_t of the current instance of bdb xlator. - * - * bdb_checkpoint is called in a different thread from the main glusterfs - * thread. bdb xlator creates the checkpoint thread after successfully opening - * the db environment. - * NOTE: bdb_checkpoint thread shares the DB_ENV handle with the filesystem - * thread. - * - * db environment checkpointing frequency is controlled by - * 'option checkpoint-timeout ' in volfile. - * - * NOTE: checkpointing thread is started only if 'option transaction on' - * specified in volfile. checkpointing is not valid for non-transactional - * environments. - * - */ -static void * -bdb_checkpoint (void *data) -{ - xlator_t *this = NULL; - struct bdb_private *private = NULL; - DB_ENV *dbenv = NULL; - int32_t ret = 0; - uint32_t active = 0; - - this = (xlator_t *) data; - dbenv = BDB_ENV(this); - private = this->private; - - for (;;sleep (private->checkpoint_interval)) { - LOCK (&private->active_lock); - active = private->active; - UNLOCK (&private->active_lock); - - if (active) { - ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0); - if (ret) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CHECKPOINT: %s" - "(failed to checkpoint environment)", - db_strerror (ret)); - } else { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CHECKPOINT: successfully " - "checkpointed"); - } - } else { - ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0); - if (ret) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "_BDB_CHECKPOINT: %s" - "(final checkpointing failed. might " - "need to run recovery tool manually on " - "next usage of this database " - "environment)", - db_strerror (ret)); - } else { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "_BDB_CHECKPOINT: final successfully " - "checkpointed"); - } - break; - } - } - - return NULL; -} - - -/* bdb_db_init - initialize bdb xlator - * - * reads the options from @options dictionary and sets appropriate values in - * @this->private. also initializes DB_ENV. - * - * return: 0 on success or -1 on error - * (with logging the error through gf_log()). - */ -int -bdb_db_init (xlator_t *this, - dict_t *options) -{ - /* create a db entry for root */ - int32_t op_ret = 0; - bdb_private_t *private = NULL; - bctx_table_t *table = NULL; - - char *checkpoint_interval_str = NULL; - char *page_size_str = NULL; - char *lru_limit_str = NULL; - char *timeout_str = NULL; - char *access_mode = NULL; - char *endptr = NULL; - char *errfile = NULL; - char *directory = NULL; - char *logdir = NULL; - char *mode = NULL; - char *mode_str = NULL; - int ret = -1; - int idx = 0; - struct stat stbuf = {0,}; - - private = this->private; - - /* cache is always on */ - private->cache = ON; - - ret = dict_get_str (options, "access-mode", &access_mode); - if ((ret == 0) - && (!strcmp (access_mode, "btree"))) { - gf_log (this->name, GF_LOG_DEBUG, - "using BTREE access mode to access libdb " - "(Berkeley DB)"); - private->access_mode = DB_BTREE; - } else { - gf_log (this->name, GF_LOG_DEBUG, - "using HASH access mode to access libdb (Berkeley DB)"); - private->access_mode = DB_HASH; - } - - ret = dict_get_str (options, "mode", &mode); - if ((ret == 0) - && (!strcmp (mode, "cache"))) { - gf_log (this->name, GF_LOG_DEBUG, - "cache data mode selected for 'storage/bdb'. filesystem" - " operations are not transactionally protected and " - "system crash does not guarantee recoverability of " - "data"); - private->envflags = DB_CREATE | DB_INIT_LOG | - DB_INIT_MPOOL | DB_THREAD; - private->dbflags = DB_CREATE | DB_THREAD; - private->transaction = OFF; - } else { - gf_log (this->name, GF_LOG_DEBUG, - "persistent data mode selected for 'storage/bdb'. each" - "filesystem operation is guaranteed to be Berkeley DB " - "transaction protected."); - private->transaction = ON; - private->envflags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | - DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD; - private->dbflags = DB_CREATE | DB_THREAD; - - - ret = dict_get_str (options, "lock-timeout", &timeout_str); - - if (ret == 0) { - ret = gf_string2time (timeout_str, - &private->lock_timeout); - - if (private->lock_timeout > 4260000) { - /* db allows us to DB_SET_LOCK_TIMEOUT to be - * set to a maximum of 71 mins - * (4260000 milliseconds) */ - gf_log (this->name, GF_LOG_DEBUG, - "Berkeley DB lock-timeout parameter " - "(%d) is out of range. please specify" - " a valid timeout value for " - "lock-timeout and retry.", - private->lock_timeout); - goto err; - } - } - ret = dict_get_str (options, "transaction-timeout", - &timeout_str); - if (ret == 0) { - ret = gf_string2time (timeout_str, - &private->txn_timeout); - - if (private->txn_timeout > 4260000) { - /* db allows us to DB_SET_TXN_TIMEOUT to be set - * to a maximum of 71 mins - * (4260000 milliseconds) */ - gf_log (this->name, GF_LOG_DEBUG, - "Berkeley DB lock-timeout parameter " - "(%d) is out of range. please specify" - " a valid timeout value for " - "lock-timeout and retry.", - private->lock_timeout); - goto err; - } - } - - private->checkpoint_interval = BDB_DEFAULT_CHECKPOINT_INTERVAL; - ret = dict_get_str (options, "checkpoint-interval", - &checkpoint_interval_str); - if (ret == 0) { - ret = gf_string2time (checkpoint_interval_str, - &private->checkpoint_interval); - - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "'%"PRIu32"' is not a valid parameter " - "for checkpoint-interval option. " - "please specify a valid " - "checkpoint-interval and retry", - private->checkpoint_interval); - goto err; - } - } - } - - ret = dict_get_str (options, "file-mode", &mode_str); - if (ret == 0) { - private->file_mode = strtol (mode_str, &endptr, 8); - - if ((*endptr) || - (!IS_VALID_FILE_MODE(private->file_mode))) { - gf_log (this->name, GF_LOG_DEBUG, - "'%o' is not a valid parameter for file-mode " - "option. please specify a valid parameter for " - "file-mode and retry.", - private->file_mode); - goto err; - } - } else { - private->file_mode = DEFAULT_FILE_MODE; - } - private->symlink_mode = private->file_mode | S_IFLNK; - private->file_mode = private->file_mode | S_IFREG; - - ret = dict_get_str (options, "dir-mode", &mode_str); - if (ret == 0) { - private->dir_mode = strtol (mode_str, &endptr, 8); - if ((*endptr) || - (!IS_VALID_FILE_MODE(private->dir_mode))) { - gf_log (this->name, GF_LOG_DEBUG, - "'%o' is not a valid parameter for dir-mode " - "option. please specify a valid parameter for " - "dir-mode and retry.", - private->dir_mode); - goto err; - } - } else { - private->dir_mode = DEFAULT_DIR_MODE; - } - - private->dir_mode = private->dir_mode | S_IFDIR; - - table = GF_CALLOC (1, sizeof (*table), gf_bdb_mt_bctx_table_t); - if (table == NULL) { - gf_log ("bdb-ll", GF_LOG_CRITICAL, - "memory allocation for 'storage/bdb' internal " - "context table failed."); - goto err; - } - - INIT_LIST_HEAD(&(table->b_lru)); - INIT_LIST_HEAD(&(table->active)); - INIT_LIST_HEAD(&(table->purge)); - - LOCK_INIT (&table->lock); - LOCK_INIT (&table->checkpoint_lock); - - table->transaction = private->transaction; - table->access_mode = private->access_mode; - table->dbflags = private->dbflags; - table->this = this; - - ret = dict_get_str (options, "lru-limit", - &lru_limit_str); - - /* TODO: set max lockers and max txns to accomodate - * for more than lru_limit */ - if (ret == 0) { - ret = gf_string2uint32 (lru_limit_str, - &table->lru_limit); - gf_log ("bdb-ll", GF_LOG_DEBUG, - "setting lru limit of 'storage/bdb' internal context" - "table to %d. maximum of %d unused databases can be " - "open at any given point of time.", - table->lru_limit, table->lru_limit); - } else { - table->lru_limit = BDB_DEFAULT_LRU_LIMIT; - } - - ret = dict_get_str (options, "page-size", - &page_size_str); - - if (ret == 0) { - ret = gf_string2bytesize (page_size_str, - &table->page_size); - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "\"%s\" is an invalid parameter to " - "\"option page-size\". please specify a valid " - "size and retry.", - page_size_str); - goto err; - } - - if (!PAGE_SIZE_IN_RANGE(table->page_size)) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "\"%s\" is out of range for Berkeley DB " - "page-size. allowed page-size range is %d to " - "%d. please specify a page-size value in the " - "range and retry.", - page_size_str, BDB_LL_PAGE_SIZE_MIN, - BDB_LL_PAGE_SIZE_MAX); - goto err; - } - } else { - table->page_size = BDB_LL_PAGE_SIZE_DEFAULT; - } - - table->hash_size = BDB_DEFAULT_HASH_SIZE; - table->b_hash = GF_CALLOC (BDB_DEFAULT_HASH_SIZE, - sizeof (struct list_head), - gf_bdb_mt_list_head); - - for (idx = 0; idx < table->hash_size; idx++) - INIT_LIST_HEAD(&(table->b_hash[idx])); - - private->b_table = table; - - ret = dict_get_str (options, "errfile", &errfile); - if (ret == 0) { - private->errfile = gf_strdup (errfile); - gf_log (this->name, GF_LOG_DEBUG, - "using %s as error logging file for libdb (Berkeley DB " - "library) internal logging.", private->errfile); - } - - ret = dict_get_str (options, "directory", &directory); - - if (ret == 0) { - ret = dict_get_str (options, "logdir", &logdir); - - if (ret < 0) { - gf_log ("bdb-ll", GF_LOG_DEBUG, - "using the database environment home " - "directory (%s) itself as transaction log " - "directory", directory); - private->logdir = gf_strdup (directory); - - } else { - private->logdir = gf_strdup (logdir); - - op_ret = stat (private->logdir, &stbuf); - if ((op_ret != 0) - || (!S_ISDIR (stbuf.st_mode))) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "specified logdir %s does not exist. " - "please provide a valid existing " - "directory as parameter to 'option " - "logdir'", - private->logdir); - goto err; - } - } - - private->b_table->dbenv = bdb_dbenv_init (this, directory); - if (private->b_table->dbenv == NULL) { - gf_log ("bdb-ll", GF_LOG_ERROR, - "initialization of database environment " - "failed"); - goto err; - } else { - if (private->transaction) { - /* all well, start the checkpointing thread */ - LOCK_INIT (&private->active_lock); - - LOCK (&private->active_lock); - { - private->active = 1; - } - UNLOCK (&private->active_lock); - pthread_create (&private->checkpoint_thread, - NULL, bdb_checkpoint, this); - } - } - } - - return op_ret; -err: - if (table) { - GF_FREE (table->b_hash); - GF_FREE (table); - } - if (private) { - if (private->errfile) - GF_FREE (private->errfile); - - if (private->logdir) - GF_FREE (private->logdir); - } - - return -1; -} diff --git a/xlators/storage/bdb/src/bdb-mem-types.h b/xlators/storage/bdb/src/bdb-mem-types.h deleted file mode 100644 index e68b8c0ca..000000000 --- a/xlators/storage/bdb/src/bdb-mem-types.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - Copyright (c) 2008-2011 Gluster, Inc. - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - . -*/ - - -#ifndef __POSIX_MEM_TYPES_H__ -#define __POSIX_MEM_TYPES_H__ - -#include "mem-types.h" - -enum gf_bdb_mem_types_ { - gf_bdb_mt_bctx_t = gf_common_mt_end + 1, - gf_bdb_mt_bdb_fd, - gf_bdb_mt_dir_entry_t, - gf_bdb_mt_char, - gf_bdb_mt_dir_entry_t, - gf_bdb_mt_char, - gf_bdb_mt_bdb_private, - gf_bdb_mt_uint32_t, - gf_bdb_mt_char, - gf_bdb_mt_bdb_cache_t, - gf_bdb_mt_char, - gf_bdb_mt_bctx_table_t, - gf_bdb_mt_list_head, - gf_bdb_mt_end, -}; -#endif diff --git a/xlators/storage/bdb/src/bdb.c b/xlators/storage/bdb/src/bdb.c deleted file mode 100644 index 384094b57..000000000 --- a/xlators/storage/bdb/src/bdb.c +++ /dev/null @@ -1,3603 +0,0 @@ -/* - Copyright (c) 2008-2011 Gluster, Inc. - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - . -*/ - -/* bdb based storage translator - named as 'bdb' translator - * - * - * There can be only two modes for files existing on bdb translator: - * 1. DIRECTORY - directories are stored by bdb as regular directories on - * back-end file-system. directories also have an entry in the ns_db.db of - * their parent directory. - * 2. REGULAR FILE - regular files are stored as records in the storage_db.db - * present in the directory. regular files also have an entry in ns_db.db - * - * Internally bdb has a maximum of three different types of logical files - * associated with each directory: - * 1. storage_db.db - storage database, used to store the data corresponding to - * regular files in the form of key/value pair. file-name is the 'key' and - * data is 'value'. - * 2. directory (all subdirectories) - any subdirectory will have a regular - * directory entry. - */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#define __XOPEN_SOURCE 500 - -#include -#include -#include -#include -#include - -#include "glusterfs.h" -#include "dict.h" -#include "logging.h" -#include "bdb.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" - -/* to be used only by fops, nobody else */ -#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv) -#define B_TABLE(this) (((struct bdb_private *)this->private)->b_table) - - -int32_t -bdb_mknod (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - mode_t mode, - dev_t dev) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char *key_string = NULL; /* after translating path to DB key */ - char *db_path = NULL; - bctx_t *bctx = NULL; - struct stat stbuf = {0,}; - - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - if (!S_ISREG(mode)) { - gf_log (this->name, GF_LOG_DEBUG, - "MKNOD %"PRId64"/%s (%s): EPERM" - "(mknod supported only for regular files. " - "file mode '%o' not supported)", - loc->parent->ino, loc->name, loc->path, mode); - op_ret = -1; - op_errno = EPERM; - goto out; - } /* if(!S_ISREG(mode)) */ - - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "MKNOD %"PRId64"/%s (%s): ENOMEM" - "(failed to lookup database handle)", - loc->parent->ino, loc->name, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); - - op_ret = lstat (db_path, &stbuf); - if (op_ret != 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_DEBUG, - "MKNOD %"PRId64"/%s (%s): EINVAL" - "(failed to lookup database handle)", - loc->parent->ino, loc->name, loc->path); - goto out; - } - - MAKE_KEY_FROM_PATH (key_string, loc->path); - op_ret = bdb_db_icreate (bctx, key_string); - if (op_ret > 0) { - /* create successful */ - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, - key_string, - strlen (key_string)); - stbuf.st_mode = mode; - stbuf.st_size = 0; - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, \ - stbuf.st_blksize); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "MKNOD %"PRId64"/%s (%s): ENOMEM" - "(failed to create database entry)", - loc->parent->ino, loc->name, loc->path); - op_ret = -1; - op_errno = EINVAL; /* TODO: errno sari illa */ - goto out; - }/* if (!op_ret)...else */ - -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); - return 0; -} - -static inline int32_t -is_dir_empty (xlator_t *this, - loc_t *loc) -{ - int32_t ret = 1; - bctx_t *bctx = NULL; - DIR *dir = NULL; - char *real_path = NULL; - void *dbstat = NULL; - struct dirent *entry = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - bctx = bctx_lookup (B_TABLE(this), loc->path); - if (bctx == NULL) { - ret = -ENOMEM; - goto out; - } - - dbstat = bdb_db_stat (bctx, NULL, 0); - if (dbstat) { - switch (bctx->table->access_mode) - { - case DB_HASH: - ret = (((DB_HASH_STAT *)dbstat)->hash_nkeys == 0); - break; - case DB_BTREE: - case DB_RECNO: - ret = (((DB_BTREE_STAT *)dbstat)->bt_nkeys == 0); - break; - case DB_QUEUE: - ret = (((DB_QUEUE_STAT *)dbstat)->qs_nkeys == 0); - break; - case DB_UNKNOWN: - gf_log (this->name, GF_LOG_CRITICAL, - "unknown access-mode set for database"); - ret = 0; - } - } else { - ret = -EBUSY; - goto out; - } - - MAKE_REAL_PATH (real_path, this, loc->path); - dir = opendir (real_path); - if (dir == NULL) { - ret = -errno; - goto out; - } - - while ((entry = readdir (dir))) { - if ((!IS_BDB_PRIVATE_FILE(entry->d_name)) && - (!IS_DOT_DOTDOT(entry->d_name))) { - ret = 0; - break; - }/* if(!IS_BDB_PRIVATE_FILE()) */ - } /* while(true) */ - closedir (dir); -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - return ret; -} - -int32_t -bdb_rename (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc) -{ - STACK_UNWIND (frame, -1, EXDEV, NULL); - return 0; -} - -int32_t -bdb_link (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc) -{ - STACK_UNWIND (frame, -1, EXDEV, NULL, NULL); - return 0; -} - -int32_t -is_space_left (xlator_t *this, - size_t size) -{ - struct bdb_private *private = this->private; - struct statvfs stbuf = {0,}; - int32_t ret = -1; - fsblkcnt_t req_blocks = 0; - fsblkcnt_t usable_blocks = 0; - - ret = statvfs (private->export_path, &stbuf); - if (ret != 0) { - ret = 0; - } else { - req_blocks = (size / stbuf.f_frsize) + 1; - - usable_blocks = (stbuf.f_bfree - BDB_ENOSPC_THRESHOLD); - - if (req_blocks < usable_blocks) - ret = 1; - else - ret = 0; - } - - return ret; -} - -int32_t -bdb_create (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - mode_t mode, - fd_t *fd) -{ - int32_t op_ret = -1; - int32_t op_errno = EPERM; - char *db_path = NULL; - struct stat stbuf = {0,}; - bctx_t *bctx = NULL; - struct bdb_private *private = NULL; - char *key_string = NULL; - struct bdb_fd *bfd = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - private = this->private; - - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "CREATE %"PRId64"/%s (%s): ENOMEM" - "(failed to lookup database handle)", - loc->parent->ino, loc->name, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); - op_ret = lstat (db_path, &stbuf); - if (op_ret != 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_DEBUG, - "CREATE %"PRId64"/%s (%s): EINVAL" - "(database file missing)", - loc->parent->ino, loc->name, loc->path); - goto out; - } - - MAKE_KEY_FROM_PATH (key_string, loc->path); - op_ret = bdb_db_icreate (bctx, key_string); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "CREATE %"PRId64"/%s (%s): ENOMEM" - "(failed to create database entry)", - loc->parent->ino, loc->name, loc->path); - op_errno = EINVAL; /* TODO: errno sari illa */ - goto out; - } - - /* create successful */ - bfd = GF_CALLOC (1, sizeof (*bfd), gf_bdb_mt_bdb_fd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "CREATE %"PRId64"/%s (%s): ENOMEM" - "(failed to allocate memory for internal fd context)", - loc->parent->ino, loc->name, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - /* NOTE: bdb_get_bctx_from () returns bctx with a ref */ - bfd->ctx = bctx; - bfd->key = gf_strdup (key_string); - if (bfd->key == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "CREATE %"PRId64" (%s): ENOMEM" - "(failed to allocate memory for internal fd->key)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - BDB_FCTX_SET (fd, this, bfd); - - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, - key_string, - strlen (key_string)); - stbuf.st_mode = private->file_mode; - stbuf.st_size = 0; - stbuf.st_nlink = 1; - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); - op_ret = 0; - op_errno = 0; -out: - STACK_UNWIND (frame, op_ret, op_errno, fd, loc->inode, &stbuf); - - return 0; -} - - -/* bdb_open - * - * as input parameters bdb_open gets the file name, i.e key. bdb_open should - * effectively - * do: store key, open storage db, store storage-db pointer. - * - */ -int32_t -bdb_open (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - fd_t *fd, - int32_t wbflags) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - bctx_t *bctx = NULL; - char *key_string = NULL; - struct bdb_fd *bfd = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "OPEN %"PRId64" (%s): ENOMEM" - "(failed to lookup database handle)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - bfd = GF_CALLOC (1, sizeof (*bfd), gf_bdb_mt_bdb_fd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "OPEN %"PRId64" (%s): ENOMEM" - "(failed to allocate memory for internal fd context)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - /* NOTE: bctx_parent () returns bctx with a ref */ - bfd->ctx = bctx; - - MAKE_KEY_FROM_PATH (key_string, loc->path); - bfd->key = gf_strdup (key_string); - if (bfd->key == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "OPEN %"PRId64" (%s): ENOMEM" - "(failed to allocate memory for internal fd->key)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - BDB_FCTX_SET (fd, this, bfd); - op_ret = 0; -out: - STACK_UNWIND (frame, op_ret, op_errno, fd); - - return 0; -} - -int32_t -bdb_readv (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t offset) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - struct iovec vec = {0,}; - struct stat stbuf = {0,}; - struct bdb_fd *bfd = NULL; - char *db_path = NULL; - int32_t read_size = 0; - struct iobref *iobref = NULL; - struct iobuf *iobuf = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "READV %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD" - "(internal fd not found through fd)", - fd->inode->ino, size, offset); - op_errno = EBADFD; - op_ret = -1; - goto out; - } - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory); - op_ret = lstat (db_path, &stbuf); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "READV %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EINVAL" - "(database file missing)", - fd->inode->ino, size, offset); - goto out; - } - - iobuf = iobuf_get (this->ctx->iobuf_pool); - if (!iobuf) { - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - /* we are ready to go */ - op_ret = bdb_db_fread (bfd, iobuf->ptr, size, offset); - read_size = op_ret; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "READV %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD" - "(failed to find entry in database)", - fd->inode->ino, size, offset); - op_ret = -1; - op_errno = ENOENT; - goto out; - } else if (op_ret == 0) { - goto out; - } - - iobref = iobref_new (); - if (iobref == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - if (size < read_size) { - op_ret = size; - read_size = size; - } - - iobref_add (iobref, iobuf); - - vec.iov_base = iobuf->ptr; - vec.iov_len = read_size; - - stbuf.st_ino = fd->inode->ino; - stbuf.st_size = bdb_db_fread (bfd, NULL, 0, 0); - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); - op_ret = size; -out: - STACK_UNWIND (frame, op_ret, op_errno, &vec, 1, &stbuf, iobuf); - - if (iobref) - iobref_unref (iobref); - - if (iobuf) - iobuf_unref (iobuf); - - return 0; -} - - -int32_t -bdb_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t offset, - struct iobref *iobref) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - struct stat stbuf = {0,}; - struct bdb_fd *bfd = NULL; - int32_t idx = 0; - off_t c_off = offset; - int32_t c_ret = -1; - char *db_path = NULL; - size_t total_size = 0; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, vector, out); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "WRITEV %"PRId64" - %"PRId32",%"PRId64": EBADFD" - "(internal fd not found through fd)", - fd->inode->ino, count, offset); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory); - op_ret = lstat (db_path, &stbuf); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "WRITEV %"PRId64" - %"PRId32",%"PRId64": EINVAL" - "(database file missing)", - fd->inode->ino, count, offset); - goto out; - } - - for (idx = 0; idx < count; idx++) - total_size += vector[idx].iov_len; - - if (!is_space_left (this, total_size)) { - gf_log (this->name, GF_LOG_ERROR, - "WRITEV %"PRId64" - %"PRId32" (%"GF_PRI_SIZET"),%" - PRId64": ENOSPC " - "(not enough space after internal measurement)", - fd->inode->ino, count, total_size, offset); - op_ret = -1; - op_errno = ENOSPC; - goto out; - } - - /* we are ready to go */ - for (idx = 0; idx < count; idx++) { - c_ret = bdb_db_fwrite (bfd, vector[idx].iov_base, - vector[idx].iov_len, c_off); - if (c_ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "WRITEV %"PRId64" - %"PRId32",%"PRId64": EINVAL" - "(database write at %"PRId64" failed)", - fd->inode->ino, count, offset, c_off); - break; - } else { - c_off += vector[idx].iov_len; - } - op_ret += vector[idx].iov_len; - } /* for(idx=0;...)... */ - - if (c_ret) { - /* write failed after a point, not an error */ - stbuf.st_size = bdb_db_fread (bfd, NULL, 0, 0); - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, - stbuf.st_blksize); - goto out; - } - - /* NOTE: we want to increment stbuf->st_size, as stored in db */ - stbuf.st_size = op_ret; - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); - op_errno = 0; - -out: - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); - return 0; -} - -int32_t -bdb_flush (call_frame_t *frame, - xlator_t *this, - fd_t *fd) -{ - int32_t op_ret = -1; - int32_t op_errno = EPERM; - struct bdb_fd *bfd = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "FLUSH %"PRId64": EBADFD" - "(internal fd not found through fd)", - fd->inode->ino); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - - /* do nothing */ - op_ret = 0; - op_errno = 0; - -out: - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -} - -int32_t -bdb_release (xlator_t *this, - fd_t *fd) -{ - int32_t op_ret = -1; - int32_t op_errno = EBADFD; - struct bdb_fd *bfd = NULL; - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "RELEASE %"PRId64": EBADFD" - "(internal fd not found through fd)", - fd->inode->ino); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - - bctx_unref (bfd->ctx); - bfd->ctx = NULL; - - if (bfd->key) - GF_FREE (bfd->key); /* we did strdup() in bdb_open() */ - GF_FREE (bfd); - op_ret = 0; - op_errno = 0; - -out: - return 0; -}/* bdb_release */ - - -int32_t -bdb_fsync (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int32_t datasync) -{ - STACK_UNWIND (frame, 0, 0); - return 0; -}/* bdb_fsync */ - -static int gf_bdb_lk_log; - -int32_t -bdb_lk (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int32_t cmd, - struct gf_flock *lock) -{ - struct gf_flock nullock = {0, }; - - if (BDB_TIMED_LOG (ENOTSUP, gf_bdb_lk_log)) { - gf_log (this->name, GF_LOG_DEBUG, - "LK %"PRId64": ENOTSUP " - "(load \"features/locks\" translator to enable " - "lock support)", - fd->inode->ino); - } - - STACK_UNWIND (frame, -1, ENOTSUP, &nullock); - return 0; -}/* bdb_lk */ - -/* bdb_lookup - * - * there are four possibilities for a file being looked up: - * 1. file exists and is a directory. - * 2. file exists and is a symlink. - * 3. file exists and is a regular file. - * 4. file does not exist. - * case 1 and 2 are handled by doing lstat() on the @loc. if the file is a - * directory or symlink, lstat() succeeds. lookup continues to check if the - * @loc belongs to case-3 only if lstat() fails. - * to check for case 3, bdb_lookup does a bdb_db_iread() for the given @loc. - * (see description of bdb_db_iread() for more details on how @loc is transformed - * into db handle and key). if check for case 1, 2 and 3 fail, we proceed to - * conclude that file doesn't exist (case 4). - * - * @frame: call frame. - * @this: xlator_t of this instance of bdb xlator. - * @loc: loc_t specifying the file to operate upon. - * @need_xattr: if need_xattr != 0, we are asked to return all the extended - * attributed of @loc, if any exist, in a dictionary. if @loc is a regular - * file and need_xattr is set, then we look for value of need_xattr. if - * need_xattr > sizo-of-the-file @loc, then the file content of @loc is - * returned in dictionary of xattr with 'glusterfs.content' as dictionary key. - * - * NOTE: bdb currently supports only directories, symlinks and regular files. - * - * NOTE: bdb_lookup returns the 'struct stat' of underlying file itself, in - * case of directory and symlink (st_ino is modified as bdb allocates its own - * set of inodes of all files). for regular files, bdb uses 'struct stat' of - * the database file in which the @loc is stored as templete and modifies - * st_ino (see bdb_inode_transform for more details), st_mode (can be set in - * volfile 'option file-mode '), st_size (exact size of the @loc - * contents), st_blocks (block count on the underlying filesystem to - * accomodate st_size, see BDB_COUNT_BLOCKS in bdb.h for more details). - */ -int32_t -bdb_lookup (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *xattr_req) -{ - struct stat stbuf = {0, }; - int32_t op_ret = -1; - int32_t op_errno = ENOENT; - dict_t *xattr = NULL; - char *pathname = NULL; - char *directory = NULL; - char *real_path = NULL; - bctx_t *bctx = NULL; - char *db_path = NULL; - struct bdb_private *private = NULL; - char *key_string = NULL; - int32_t entry_size = 0; - char *file_content = NULL; - uint64_t need_xattr = 0; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - private = this->private; - - MAKE_REAL_PATH (real_path, this, loc->path); - - pathname = gf_strdup (loc->path); - GF_VALIDATE_OR_GOTO (this->name, pathname, out); - - directory = dirname (pathname); - GF_VALIDATE_OR_GOTO (this->name, directory, out); - - if (!strcmp (directory, loc->path)) { - /* SPECIAL CASE: looking up root */ - op_ret = lstat (real_path, &stbuf); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "LOOKUP %"PRId64" (%s): %s", - loc->ino, loc->path, strerror (op_errno)); - goto out; - } - - /* bctx_lookup() returns NULL only when its time to wind up, - * we should shutdown functioning */ - bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "LOOKUP %"PRId64" (%s): ENOMEM" - "(failed to lookup database handle)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - stbuf.st_ino = 1; - stbuf.st_mode = private->dir_mode; - - op_ret = 0; - goto out; - } - - MAKE_KEY_FROM_PATH (key_string, loc->path); - op_ret = lstat (real_path, &stbuf); - if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))){ - bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "LOOKUP %"PRId64"/%s (%s): ENOMEM" - "(failed to lookup database handle)", - loc->parent->ino, loc->name, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - if (loc->ino) { - /* revalidating directory inode */ - stbuf.st_ino = loc->ino; - } else { - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, - key_string, - strlen (key_string)); - } - stbuf.st_mode = private->dir_mode; - - op_ret = 0; - goto out; - - } else if (op_ret == 0) { - /* a symlink */ - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "LOOKUP %"PRId64"/%s (%s): ENOMEM" - "(failed to lookup database handle)", - loc->parent->ino, loc->name, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - if (loc->ino) { - stbuf.st_ino = loc->ino; - } else { - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, - key_string, - strlen (key_string)); - } - - stbuf.st_mode = private->symlink_mode; - - op_ret = 0; - goto out; - - } - - /* for regular files */ - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "LOOKUP %"PRId64"/%s (%s): ENOMEM" - "(failed to lookup database handle for parent)", - loc->parent->ino, loc->name, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - if (GF_FILE_CONTENT_REQUESTED(xattr_req, &need_xattr)) { - entry_size = bdb_db_iread (bctx, key_string, &file_content); - } else { - entry_size = bdb_db_iread (bctx, key_string, NULL); - } - - op_ret = entry_size; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "LOOKUP %"PRId64"/%s (%s): ENOENT" - "(database entry not found)", - loc->parent->ino, loc->name, loc->path); - op_errno = ENOENT; - goto out; - } - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); - op_ret = lstat (db_path, &stbuf); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "LOOKUP %"PRId64"/%s (%s): %s", - loc->parent->ino, loc->name, loc->path, - strerror (op_errno)); - goto out; - } - - if (entry_size - && (need_xattr >= entry_size) - && (file_content)) { - xattr = dict_new (); - op_ret = dict_set_dynptr (xattr, "glusterfs.content", - file_content, entry_size); - if (op_ret < 0) { - /* continue without giving file contents */ - GF_FREE (file_content); - } - } else { - if (file_content) - GF_FREE (file_content); - } - - if (loc->ino) { - /* revalidate */ - stbuf.st_ino = loc->ino; - stbuf.st_size = entry_size; - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, - stbuf.st_blksize); - } else { - /* fresh lookup, create an inode number */ - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, - key_string, - strlen (key_string)); - stbuf.st_size = entry_size; - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, - stbuf.st_blksize); - }/* if(inode->ino)...else */ - stbuf.st_nlink = 1; - stbuf.st_mode = private->file_mode; - - op_ret = 0; -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - if (pathname) - GF_FREE (pathname); - - if (xattr) - dict_ref (xattr); - - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf, xattr); - - if (xattr) - dict_unref (xattr); - - return 0; - -}/* bdb_lookup */ - -int32_t -bdb_stat (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - - struct stat stbuf = {0,}; - char *real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - struct bdb_private *private = NULL; - char *db_path = NULL; - bctx_t *bctx = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - private = this->private; - GF_VALIDATE_OR_GOTO (this->name, private, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = lstat (real_path, &stbuf); - op_errno = errno; - if (op_ret == 0) { - /* directory or symlink */ - stbuf.st_ino = loc->inode->ino; - if (S_ISDIR(stbuf.st_mode)) - stbuf.st_mode = private->dir_mode; - else - stbuf.st_mode = private->symlink_mode; - /* we are done, lets unwind the stack */ - goto out; - } - - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "STAT %"PRId64" (%s): ENOMEM" - "(no database handle for parent)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); - op_ret = lstat (db_path, &stbuf); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "STAT %"PRId64" (%s): %s" - "(failed to stat on database file)", - loc->ino, loc->path, strerror (op_errno)); - goto out; - } - - stbuf.st_size = bdb_db_iread (bctx, loc->path, NULL); - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); - stbuf.st_ino = loc->inode->ino; - -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); - - return 0; -}/* bdb_stat */ - - - -/* bdb_opendir - in the world of bdb, open/opendir is all about opening - * correspondind databases. opendir in particular, opens the database for the - * directory which is to be opened. after opening the database, a cursor to - * the database is also created. cursor helps us get the dentries one after - * the other, and cursor maintains the state about current positions in - * directory. pack 'pointer to db', 'pointer to the cursor' into - * struct bdb_dir and store it in fd->ctx, we get from our parent xlator. - * - * @frame: call frame - * @this: our information, as we filled during init() - * @loc: location information - * @fd: file descriptor structure (glusterfs internal) - * - * return value - immaterial, async call. - * - */ -int32_t -bdb_opendir (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - fd_t *fd) -{ - char *real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - bctx_t *bctx = NULL; - struct bdb_dir *bfd = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - - bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "OPENDIR %"PRId64" (%s): ENOMEM" - "(no database handle for directory)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - bfd = GF_CALLOC (1, sizeof (*bfd), gf_bdb_mt_bdb_fd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "OPENDIR %"PRId64" (%s): ENOMEM" - "(failed to allocate memory for internal fd)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto err; - } - - bfd->dir = opendir (real_path); - if (bfd->dir == NULL) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "OPENDIR %"PRId64" (%s): %s", - loc->ino, loc->path, strerror (op_errno)); - goto err; - } - - /* NOTE: bctx_lookup() return bctx with ref */ - bfd->ctx = bctx; - - bfd->path = gf_strdup (real_path); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "OPENDIR %"PRId64" (%s): ENOMEM" - "(failed to allocate memory for internal fd->path)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto err; - } - - BDB_FCTX_SET (fd, this, bfd); - op_ret = 0; -out: - STACK_UNWIND (frame, op_ret, op_errno, fd); - return 0; -err: - if (bctx) - bctx_unref (bctx); - if (bfd) { - if (bfd->dir) - closedir (bfd->dir); - - GF_FREE (bfd); - } - - return 0; -}/* bdb_opendir */ - -int32_t -bdb_getdents (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t off, - int32_t flag) -{ - struct bdb_dir *bfd = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - size_t filled = 0; - dir_entry_t entries = {0, }; - dir_entry_t *this_entry = NULL; - char *entry_path = NULL; - struct dirent *dirent = NULL; - off_t in_case = 0; - int32_t this_size = 0; - DBC *cursorp = NULL; - int32_t ret = -1; - int32_t real_path_len = 0; - int32_t entry_path_len = 0; - int32_t count = 0; - off_t offset = 0; - size_t tmp_name_len = 0; - struct stat db_stbuf = {0,}; - struct stat buf = {0,}; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64 - " %o: EBADFD " - "(failed to find internal context in fd)", - fd->inode->ino, size, off, flag); - op_errno = EBADFD; - op_ret = -1; - goto out; - } - - op_ret = bdb_cursor_open (bfd->ctx, &cursorp); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64 - ": EBADFD " - "(failed to open cursor to database handle)", - fd->inode->ino, size, off); - op_errno = EBADFD; - goto out; - } - - if (off) { - DBT sec = {0,}, pri = {0,}, val = {0,}; - sec.data = &(off); - sec.size = sizeof (off); - sec.flags = DB_DBT_USERMEM; - val.dlen = 0; - val.doff = 0; - val.flags = DB_DBT_PARTIAL; - - op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_SET); - if (op_ret == DB_NOTFOUND) { - offset = off; - goto dir_read; - } - } - - while (filled <= size) { - DBT sec = {0,}, pri = {0,}, val = {0,}; - - this_entry = NULL; - - sec.flags = DB_DBT_MALLOC; - pri.flags = DB_DBT_MALLOC; - val.dlen = 0; - val.doff = 0; - val.flags = DB_DBT_PARTIAL; - op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_NEXT); - - if (op_ret == DB_NOTFOUND) { - /* we reached end of the directory */ - op_ret = 0; - op_errno = 0; - break; - } else if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET - ",%"PRId64":" - "(failed to read the next entry from database)", - fd->inode->ino, size, off); - op_errno = ENOENT; - break; - } /* if (op_ret == DB_NOTFOUND)...else if...else */ - - if (pri.data == NULL) { - /* NOTE: currently ignore when we get key.data == NULL. - * FIXME: we should not get key.data = NULL */ - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET - ",%"PRId64":" - "(null key read for entry from database)", - fd->inode->ino, size, off); - continue; - }/* if(key.data)...else */ - - this_entry = GF_CALLOC (1, sizeof (*this_entry), - gf_bdb_mt_dir_entry_t); - if (this_entry == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64 - " - %s:" - "(failed to allocate memory for an entry)", - fd->inode->ino, size, off, strerror (errno)); - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - - this_entry->name = GF_CALLOC (pri.size + 1, sizeof (char), - gf_bdb_mt_char); - if (this_entry->name == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64 - " - %s:" - "(failed to allocate memory for an " - "entry->name)", - fd->inode->ino, size, off, strerror (errno)); - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - - memcpy (this_entry->name, pri.data, pri.size); - this_entry->buf = db_stbuf; - this_entry->buf.st_size = bdb_db_iread (bfd->ctx, - this_entry->name, NULL); - this_entry->buf.st_blocks = BDB_COUNT_BLOCKS ( - this_entry->buf.st_size, - this_entry->buf.st_blksize); - - this_entry->buf.st_ino = bdb_inode_transform (fd->inode->ino, - pri.data, - pri.size); - count++; - - this_entry->next = entries.next; - this_entry->link = ""; - entries.next = this_entry; - /* if size is 0, count can never be = size, - * so entire dir is read */ - if (sec.data) - GF_FREE (sec.data); - - if (pri.data) - GF_FREE (pri.data); - - if (count == size) - break; - }/* while */ - bdb_cursor_close (bfd->ctx, cursorp); - op_ret = count; - op_errno = 0; - if (count >= size) - goto out; -dir_read: - /* hungry kyaa? */ - if (!offset) { - rewinddir (bfd->dir); - } else { - seekdir (bfd->dir, offset); - } - - while (filled <= size) { - this_entry = NULL; - this_size = 0; - - in_case = telldir (bfd->dir); - dirent = readdir (bfd->dir); - if (!dirent) - break; - - if (IS_BDB_PRIVATE_FILE(dirent->d_name)) - continue; - - tmp_name_len = strlen (dirent->d_name); - if (entry_path_len < (real_path_len + 1 + (tmp_name_len) + 1)) { - entry_path_len = real_path_len + tmp_name_len + 1024; - entry_path = realloc (entry_path, entry_path_len); - if (entry_path == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET"," - "%"PRId64" - %s: (failed to allocate " - "memory for an entry_path)", - fd->inode->ino, size, off, - strerror (errno)); - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - } - - memcpy (&entry_path[real_path_len+1], dirent->d_name, - tmp_name_len + 1); - op_ret = stat (entry_path, &buf); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64 - " - %s:" - " (failed to stat on an entry '%s')", - fd->inode->ino, size, off, - strerror (errno), entry_path); - goto out; /* FIXME: shouldn't we continue here */ - } - - if ((flag == GF_GET_DIR_ONLY) && - ((ret != -1) && (!S_ISDIR(buf.st_mode)))) { - continue; - } - - this_entry = GF_CALLOC (1, sizeof (*this_entry), - gf_bdb_mt_dir_entry_t); - if (this_entry == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64 - " - %s:" - "(failed to allocate memory for an entry)", - fd->inode->ino, size, off, strerror (errno)); - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - - this_entry->name = gf_strdup (dirent->d_name); - if (this_entry->name == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64 - " - %s:" - "(failed to allocate memory for an " - "entry->name)", - fd->inode->ino, size, off, strerror (errno)); - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - - this_entry->buf = buf; - - this_entry->buf.st_ino = -1; - if (S_ISLNK(this_entry->buf.st_mode)) { - char linkpath[PATH_MAX] = {0,}; - ret = readlink (entry_path, linkpath, PATH_MAX); - if (ret != -1) { - linkpath[ret] = '\0'; - this_entry->link = gf_strdup (linkpath); - } - } else { - this_entry->link = ""; - } - - count++; - - this_entry->next = entries.next; - entries.next = this_entry; - - /* if size is 0, count can never be = size, - * so entire dir is read */ - if (count == size) - break; - } - op_ret = filled; - op_errno = 0; - -out: - gf_log (this->name, GF_LOG_DEBUG, - "GETDENTS %"PRId64" - %"GF_PRI_SIZET" (%"PRId32")" - "/%"GF_PRI_SIZET",%"PRId64":" - "(failed to read the next entry from database)", - fd->inode->ino, filled, count, size, off); - - STACK_UNWIND (frame, count, op_errno, &entries); - - while (entries.next) { - this_entry = entries.next; - entries.next = entries.next->next; - GF_FREE (this_entry->name); - GF_FREE (this_entry); - } - - return 0; -}/* bdb_getdents */ - - -int32_t -bdb_releasedir (xlator_t *this, - fd_t *fd) -{ - int32_t op_ret = 0; - int32_t op_errno = 0; - struct bdb_dir *bfd = NULL; - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "RELEASEDIR %"PRId64": EBADFD", - fd->inode->ino); - op_errno = EBADFD; - op_ret = -1; - goto out; - } - - if (bfd->path) { - GF_FREE (bfd->path); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "RELEASEDIR %"PRId64": (bfd->path is NULL)", - fd->inode->ino); - } - - if (bfd->dir) { - closedir (bfd->dir); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "RELEASEDIR %"PRId64": (bfd->dir is NULL)", - fd->inode->ino); - } - - if (bfd->ctx) { - bctx_unref (bfd->ctx); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "RELEASEDIR %"PRId64": (bfd->ctx is NULL)", - fd->inode->ino); - } - - GF_FREE (bfd); - -out: - return 0; -}/* bdb_releasedir */ - - -int32_t -bdb_readlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - size_t size) -{ - char *dest = NULL; - int32_t op_ret = -1; - int32_t op_errno = EPERM; - char *real_path = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - dest = alloca (size + 1); - GF_VALIDATE_OR_GOTO (this->name, dest, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = readlink (real_path, dest, size); - - if (op_ret > 0) - dest[op_ret] = 0; - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "READLINK %"PRId64" (%s): %s", - loc->ino, loc->path, strerror (op_errno)); - } -out: - STACK_UNWIND (frame, op_ret, op_errno, dest); - - return 0; -}/* bdb_readlink */ - - -int32_t -bdb_mkdir (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - mode_t mode) -{ - int32_t op_ret = -1; - int32_t ret = -1; - int32_t op_errno = EINVAL; - char *real_path = NULL; - struct stat stbuf = {0, }; - bctx_t *bctx = NULL; - char *key_string = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - MAKE_KEY_FROM_PATH (key_string, loc->path); - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = mkdir (real_path, mode); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "MKDIR %"PRId64" (%s): %s", - loc->ino, loc->path, strerror (op_errno)); - goto out; - } - - op_ret = chown (real_path, frame->root->uid, frame->root->gid); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "MKDIR %"PRId64" (%s): %s " - "(failed to do chmod)", - loc->ino, loc->path, strerror (op_errno)); - goto err; - } - - op_ret = lstat (real_path, &stbuf); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "MKDIR %"PRId64" (%s): %s " - "(failed to do lstat)", - loc->ino, loc->path, strerror (op_errno)); - goto err; - } - - bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "MKDIR %"PRId64" (%s): ENOMEM" - "(no database handle for parent)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto err; - } - - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, key_string, - strlen (key_string)); - - goto out; - -err: - ret = rmdir (real_path); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "MKDIR %"PRId64" (%s): %s" - "(failed to do rmdir)", - loc->ino, loc->path, strerror (errno)); - } - -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); - - return 0; -}/* bdb_mkdir */ - - -int32_t -bdb_unlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - bctx_t *bctx = NULL; - char *real_path = NULL; - char *key_string = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "UNLINK %"PRId64" (%s): ENOMEM" - "(no database handle for parent)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - MAKE_KEY_FROM_PATH (key_string, loc->path); - op_ret = bdb_db_iremove (bctx, key_string); - if (op_ret == DB_NOTFOUND) { - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = unlink (real_path); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "UNLINK %"PRId64" (%s): %s" - "(symlink unlink failed)", - loc->ino, loc->path, strerror (op_errno)); - goto out; - } - } else if (op_ret == 0) { - op_errno = 0; - } -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno); - - return 0; -}/* bdb_unlink */ - - - -static int32_t -bdb_do_rmdir (xlator_t *this, - loc_t *loc) -{ - char *real_path = NULL; - int32_t ret = -1; - bctx_t *bctx = NULL; - DB_ENV *dbenv = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - dbenv = BDB_ENV(this); - GF_VALIDATE_OR_GOTO (this->name, dbenv, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - - bctx = bctx_lookup (B_TABLE(this), loc->path); - if (bctx == NULL) { - ret = -ENOMEM; - goto out; - } - - LOCK(&bctx->lock); - { - if ((bctx->primary == NULL) - || (bctx->secondary == NULL)) { - goto unlock; - } - - ret = bctx->primary->close (bctx->primary, 0); - if (ret < 0) { - ret = -EINVAL; - } - - ret = bctx->secondary->close (bctx->secondary, 0); - if (ret < 0) { - ret = -EINVAL; - } - - ret = dbenv->dbremove (dbenv, NULL, bctx->db_path, - "primary", 0); - if (ret < 0) { - ret = -EBUSY; - } - - ret = dbenv->dbremove (dbenv, NULL, bctx->db_path, - "secondary", 0); - if (ret != 0) { - ret = -EBUSY; - } - } -unlock: - UNLOCK(&bctx->lock); - - if (ret) { - goto out; - } - ret = rmdir (real_path); - -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - return ret; -} - -int32_t -bdb_rmdir (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - - op_ret = is_dir_empty (this, loc); - if (op_ret < 0) { - op_errno = -op_ret; - gf_log (this->name, GF_LOG_DEBUG, - "RMDIR %"PRId64" (%s): %s" - "(internal rmdir routine returned error)", - loc->ino, loc->path, strerror (op_errno)); - } else if (op_ret == 0) { - op_ret = -1; - op_errno = ENOTEMPTY; - gf_log (this->name, GF_LOG_DEBUG, - "RMDIR %"PRId64" (%s): ENOTEMPTY", - loc->ino, loc->path); - goto out; - } - - op_ret = bdb_do_rmdir (this, loc); - if (op_ret < 0) { - op_errno = -op_ret; - gf_log (this->name, GF_LOG_DEBUG, - "RMDIR %"PRId64" (%s): %s" - "(internal rmdir routine returned error)", - loc->ino, loc->path, strerror (op_errno)); - goto out; - } - -out: - STACK_UNWIND (frame, op_ret, op_errno); - - return 0; -} /* bdb_rmdir */ - -int32_t -bdb_symlink (call_frame_t *frame, - xlator_t *this, - const char *linkname, - loc_t *loc) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char *real_path = NULL; - struct stat stbuf = {0,}; - struct bdb_private *private = NULL; - bctx_t *bctx = NULL; - char *key_string = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, linkname, out); - - private = this->private; - GF_VALIDATE_OR_GOTO (this->name, private, out); - - MAKE_KEY_FROM_PATH (key_string, loc->path); - - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = symlink (linkname, real_path); - op_errno = errno; - if (op_ret == 0) { - op_ret = lstat (real_path, &stbuf); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "SYMLINK %"PRId64" (%s): %s", - loc->ino, loc->path, strerror (op_errno)); - goto err; - } - - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "SYMLINK %"PRId64" (%s): ENOMEM" - "(no database handle for parent)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto err; - } - - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, - key_string, - strlen (key_string)); - stbuf.st_mode = private->symlink_mode; - - goto out; - } -err: - op_ret = unlink (real_path); - op_errno = errno; - if (op_ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "SYMLINK %"PRId64" (%s): %s" - "(failed to unlink the created symlink)", - loc->ino, loc->path, strerror (op_errno)); - } - op_ret = -1; - op_errno = ENOENT; -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); - - return 0; -} /* bdb_symlink */ - -static int -bdb_do_chmod (xlator_t *this, - const char *path, - struct stat *stbuf) -{ - int32_t ret = -1; - - ret = lchmod (path, stbuf->st_mode); - if ((ret == -1) && (errno == ENOSYS)) { - ret = chmod (path, stbuf->st_mode); - } - - return ret; -} - -static int -bdb_do_chown (xlator_t *this, - const char *path, - struct stat *stbuf, - int32_t valid) -{ - int32_t ret = -1; - uid_t uid = -1; - gid_t gid = -1; - - if (valid & GF_SET_ATTR_UID) - uid = stbuf->st_uid; - - if (valid & GF_SET_ATTR_GID) - gid = stbuf->st_gid; - - ret = lchown (path, uid, gid); - - return ret; -} - -static int -bdb_do_utimes (xlator_t *this, - const char *path, - struct stat *stbuf) -{ - int32_t ret = -1; - struct timeval tv[2] = {{0,},{0,}}; - - tv[0].tv_sec = stbuf->st_atime; - tv[0].tv_usec = ST_ATIM_NSEC (stbuf) / 1000; - tv[1].tv_sec = stbuf->st_mtime; - tv[1].tv_usec = ST_ATIM_NSEC (stbuf) / 1000; - - ret = lutimes (path, tv); - - return ret; -} - -int32_t -bdb_setattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - struct stat *stbuf, - int32_t valid) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char *real_path = NULL; - struct stat preop = {0,}; - struct stat postop = {0,}; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = lstat (real_path, &preop); - op_errno = errno; - if (op_ret != 0) { - if (op_errno == ENOENT) { - op_errno = EPERM; - } else { - gf_log (this->name, GF_LOG_DEBUG, - "CHMOD %"PRId64" (%s): %s" - "(pre-op lstat failed)", - loc->ino, loc->path, strerror (op_errno)); - } - goto out; - } - - /* directory or symlink */ - if (valid & GF_SET_ATTR_MODE) { - op_ret = bdb_do_chmod (this, real_path, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (chmod) on %s failed: %s", loc->path, - strerror (op_errno)); - goto out; - } - } - - if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){ - op_ret = bdb_do_chown (this, real_path, stbuf, valid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (chown) on %s failed: %s", loc->path, - strerror (op_errno)); - goto out; - } - } - - if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { - op_ret = bdb_do_utimes (this, real_path, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (utimes) on %s failed: %s", loc->path, - strerror (op_errno)); - goto out; - } - } - - op_ret = lstat (real_path, &postop); - op_errno = errno; - if (op_ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "CHMOD %"PRId64" (%s): %s" - "(post-op lstat failed)", - loc->ino, loc->path, strerror (op_errno)); - } - -out: - STACK_UNWIND (frame, op_ret, op_errno, &preop, &postop); - - return 0; -}/* bdb_setattr */ - -int32_t -bdb_fsetattr (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct stat *stbuf, - int32_t valid) -{ - int32_t op_ret = -1; - int32_t op_errno = EPERM; - struct stat preop = {0,}; - struct stat postop = {0,}; - - STACK_UNWIND (frame, op_ret, op_errno, &preop, &postop); - - return 0; -}/* bdb_fsetattr */ - - -int32_t -bdb_truncate (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - off_t offset) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char *real_path = NULL; - struct stat stbuf = {0,}; - char *db_path = NULL; - bctx_t *bctx = NULL; - char *key_string = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - bctx = bctx_parent (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "TRUNCATE %"PRId64" (%s): ENOMEM" - "(no database handle for parent)", - loc->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - MAKE_REAL_PATH (real_path, this, loc->path); - MAKE_KEY_FROM_PATH (key_string, loc->path); - - /* now truncate */ - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); - op_ret = lstat (db_path, &stbuf); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "TRUNCATE %"PRId64" (%s): %s" - "(lstat on database file failed)", - loc->ino, loc->path, strerror (op_errno)); - goto out; - } - - if (loc->inode->ino) { - stbuf.st_ino = loc->inode->ino; - }else { - stbuf.st_ino = bdb_inode_transform (loc->parent->ino, - key_string, - strlen (key_string)); - } - - op_ret = bdb_db_itruncate (bctx, key_string); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "TRUNCATE %"PRId64" (%s): EINVAL" - "(truncating entry in database failed - %s)", - loc->ino, loc->path, db_strerror (op_ret)); - op_errno = EINVAL; /* TODO: better errno */ - } - -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); - - return 0; -}/* bdb_truncate */ - - -int32_t -bdb_statfs (call_frame_t *frame, - xlator_t *this, - loc_t *loc) - -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char *real_path = NULL; - struct statvfs buf = {0, }; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = statvfs (real_path, &buf); - op_errno = errno; -out: - STACK_UNWIND (frame, op_ret, op_errno, &buf); - return 0; -}/* bdb_statfs */ - -static int gf_bdb_xattr_log; - -/* bdb_setxattr - set extended attributes. - * - * bdb allows setxattr operation only on directories. - * bdb reservers 'glusterfs.file.' to operate on the content - * of the files under the specified directory. - * 'glusterfs.file.' transforms to contents of file of name - * '' under specified directory. - * - * @frame: call frame. - * @this: xlator_t of this instance of bdb xlator. - * @loc: loc_t specifying the file to operate upon. - * @dict: list of extended attributes to set on @loc. - * @flags: can be XATTR_REPLACE (replace an existing extended attribute only if - * it exists) or XATTR_CREATE (create an extended attribute only if it - * doesn't already exist). - * - * - */ -int32_t -bdb_setxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *dict, - int flags) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - data_pair_t *trav = dict->members_list; - bctx_t *bctx = NULL; - char *real_path = NULL; - char *key = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, dict, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - if (!S_ISDIR (loc->inode->st_mode)) { - op_ret = -1; - op_errno = ENOATTR; - goto out; - } - - while (trav) { - if (GF_FILE_CONTENT_REQUEST(trav->key) ) { - key = BDB_KEY_FROM_FREQUEST_KEY(trav->key); - - bctx = bctx_lookup (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "SETXATTR %"PRId64" (%s) - %s: ENOMEM" - "(no database handle for directory)", - loc->ino, loc->path, key); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - if (flags & XATTR_REPLACE) { - op_ret = bdb_db_itruncate (bctx, key); - if (op_ret == -1) { - /* key doesn't exist in database */ - gf_log (this->name, GF_LOG_DEBUG, - "SETXATTR %"PRId64" (%s) - %s:" - " (entry not present in " - "database)", - loc->ino, loc->path, key); - op_ret = -1; - op_errno = ENOATTR; - break; - } - op_ret = bdb_db_iwrite (bctx, key, - trav->value->data, - trav->value->len); - if (op_ret != 0) { - op_ret = -1; - op_errno = ENOATTR; - break; - } - } else { - /* fresh create */ - op_ret = bdb_db_iwrite (bctx, key, - trav->value->data, - trav->value->len); - if (op_ret != 0) { - op_ret = -1; - op_errno = EEXIST; - break; - } else { - op_ret = 0; - op_errno = 0; - } /* if(op_ret!=0)...else */ - } /* if(flags&XATTR_REPLACE)...else */ - if (bctx) { - /* NOTE: bctx_unref always returns success, see - * description of bctx_unref for more details */ - bctx_unref (bctx); - } - } else { - /* do plain setxattr */ - op_ret = lsetxattr (real_path, - trav->key, trav->value->data, - trav->value->len, - flags); - op_errno = errno; - - if ((op_errno == ENOATTR) || (op_errno == EEXIST)) { - /* don't log, normal behaviour */ - ; - } else if (BDB_TIMED_LOG (op_errno, gf_bdb_xattr_log)) { - gf_log (this->name, GF_LOG_DEBUG, - "SETXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, trav->key, - strerror (op_errno)); - /* do not continue, break out */ - break; - } else { - gf_log (this->name, GF_LOG_DEBUG, - "SETXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, trav->key, - strerror (op_errno)); - } - } /* if(ZR_FILE_CONTENT_REQUEST())...else */ - trav = trav->next; - }/* while(trav) */ -out: - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -}/* bdb_setxattr */ - - -/* bdb_gettxattr - get extended attributes. - * - * bdb allows getxattr operation only on directories. - * bdb_getxattr retrieves the whole content of the file, when - * glusterfs.file. is specified. - * - * @frame: call frame. - * @this: xlator_t of this instance of bdb xlator. - * @loc: loc_t specifying the file to operate upon. - * @name: name of extended attributes to get for @loc. - * - * NOTE: see description of bdb_setxattr for details on how - * 'glusterfs.file.' is handles by bdb. - */ -int32_t -bdb_getxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name) -{ - int32_t op_ret = 0; - int32_t op_errno = 0; - dict_t *dict = NULL; - bctx_t *bctx = NULL; - char *buf = NULL; - char *key_string = NULL; - int32_t list_offset = 0; - size_t size = 0; - size_t remaining_size = 0; - char *real_path = NULL; - char key[1024] = {0,}; - char *value = NULL; - char *list = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, name, out); - - dict = dict_new (); - GF_VALIDATE_OR_GOTO (this->name, dict, out); - - if (!S_ISDIR (loc->inode->st_mode)) { - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: ENOATTR " - "(not a directory)", - loc->ino, loc->path, name); - op_ret = -1; - op_errno = ENOATTR; - goto out; - } - - if (name && GF_FILE_CONTENT_REQUEST(name)) { - bctx = bctx_lookup (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: ENOMEM" - "(no database handle for directory)", - loc->ino, loc->path, name); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - key_string = BDB_KEY_FROM_FREQUEST_KEY(name); - - op_ret = bdb_db_iread (bctx, key_string, &buf); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: ENOATTR" - "(attribute not present in database)", - loc->ino, loc->path, name); - op_errno = ENOATTR; - goto out; - } - - op_ret = dict_set_dynptr (dict, (char *)name, buf, op_ret); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: ENOATTR" - "(attribute present in database, " - "dict set failed)", - loc->ino, loc->path, name); - op_errno = ENODATA; - } - - goto out; - } - - MAKE_REAL_PATH (real_path, this, loc->path); - size = sys_llistxattr (real_path, NULL, 0); - op_errno = errno; - if (size < 0) { - if (BDB_TIMED_LOG (op_errno, gf_bdb_xattr_log)) { - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, name, strerror (op_errno)); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, name, strerror (op_errno)); - } - op_ret = -1; - op_errno = ENOATTR; - - goto out; - } - - if (size == 0) - goto done; - - list = alloca (size + 1); - if (list == NULL) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, name, strerror (op_errno)); - } - - size = sys_llistxattr (real_path, list, size); - op_ret = size; - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, name, strerror (op_errno)); - goto out; - } - - remaining_size = size; - list_offset = 0; - while (remaining_size > 0) { - if(*(list+list_offset) == '\0') - break; - - strcpy (key, list + list_offset); - - op_ret = sys_lgetxattr (real_path, key, NULL, 0); - if (op_ret == -1) - break; - - value = GF_CALLOC (op_ret + 1, sizeof(char), gf_bdb_mt_char); - GF_VALIDATE_OR_GOTO (this->name, value, out); - - op_ret = sys_lgetxattr (real_path, key, value, - op_ret); - if (op_ret == -1) - break; - value [op_ret] = '\0'; - op_ret = dict_set_dynptr (dict, key, - value, op_ret); - if (op_ret < 0) { - GF_FREE (value); - gf_log (this->name, GF_LOG_DEBUG, - "GETXATTR %"PRId64" (%s) - %s: " - "(skipping key %s)", - loc->ino, loc->path, name, key); - continue; - } - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; - } /* while(remaining_size>0) */ -done: -out: - if(bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno, dict); - - if (dict) - dict_unref (dict); - - return 0; -}/* bdb_getxattr */ - - -int32_t -bdb_removexattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - bctx_t *bctx = NULL; - char *real_path = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, name, out); - - if (!S_ISDIR(loc->inode->st_mode)) { - gf_log (this->name, GF_LOG_DEBUG, - "REMOVEXATTR %"PRId64" (%s) - %s: ENOATTR " - "(not a directory)", - loc->ino, loc->path, name); - op_ret = -1; - op_errno = ENOATTR; - goto out; - } - - if (GF_FILE_CONTENT_REQUEST(name)) { - bctx = bctx_lookup (B_TABLE(this), loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "REMOVEXATTR %"PRId64" (%s) - %s: ENOATTR" - "(no database handle for directory)", - loc->ino, loc->path, name); - op_ret = -1; - op_errno = ENOATTR; - goto out; - } - - op_ret = bdb_db_iremove (bctx, name); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "REMOVEXATTR %"PRId64" (%s) - %s: ENOATTR" - "(no such attribute in database)", - loc->ino, loc->path, name); - op_errno = ENOATTR; - } - goto out; - } - - MAKE_REAL_PATH(real_path, this, loc->path); - op_ret = lremovexattr (real_path, name); - op_errno = errno; - if (op_ret == -1) { - if (BDB_TIMED_LOG (op_errno, gf_bdb_xattr_log)) { - gf_log (this->name, GF_LOG_DEBUG, - "REMOVEXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, name, strerror (op_errno)); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "REMOVEXATTR %"PRId64" (%s) - %s: %s", - loc->ino, loc->path, name, strerror (op_errno)); - } - } /* if(op_ret == -1) */ -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -}/* bdb_removexattr */ - - -int32_t -bdb_fsyncdir (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int datasync) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - struct bdb_fd *bfd = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "FSYNCDIR %"PRId64": EBADFD" - "(failed to find internal context from fd)", - fd->inode->ino); - op_errno = EBADFD; - op_ret = -1; - } - -out: - STACK_UNWIND (frame, op_ret, op_errno); - - return 0; -}/* bdb_fsycndir */ - - -int32_t -bdb_access (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t mask) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char *real_path = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = access (real_path, mask); - op_errno = errno; - /* TODO: implement for db entries */ -out: - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -}/* bdb_access */ - - -int32_t -bdb_ftruncate (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - off_t offset) -{ - int32_t op_ret = -1; - int32_t op_errno = EPERM; - struct stat buf = {0,}; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - /* TODO: impelement */ -out: - STACK_UNWIND (frame, op_ret, op_errno, &buf); - - return 0; -} - - - -int32_t -bdb_setdents (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int32_t flags, - dir_entry_t *entries, - int32_t count) -{ - int32_t op_ret = -1, op_errno = EINVAL; - char *entry_path = NULL; - int32_t real_path_len = 0; - int32_t entry_path_len = 0; - int32_t ret = 0; - struct bdb_dir *bfd = NULL; - dir_entry_t *trav = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, entries, out); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "SETDENTS %"PRId64": EBADFD", - fd->inode->ino); - op_errno = EBADFD; - op_ret = -1; - goto out; - } - - real_path_len = strlen (bfd->path); - entry_path_len = real_path_len + 256; - entry_path = GF_CALLOC (1, entry_path_len, gf_bdb_mt_char); - GF_VALIDATE_OR_GOTO (this->name, entry_path, out); - - strcpy (entry_path, bfd->path); - entry_path[real_path_len] = '/'; - - trav = entries->next; - while (trav) { - char pathname[PATH_MAX] = {0,}; - strcpy (pathname, entry_path); - strcat (pathname, trav->name); - - if (S_ISDIR(trav->buf.st_mode)) { - /* If the entry is directory, create it by calling - * 'mkdir'. If directory is not present, it will be - * created, if its present, no worries even if it fails. - */ - ret = mkdir (pathname, trav->buf.st_mode); - if ((ret == -1) && (errno != EEXIST)) { - op_errno = errno; - op_ret = ret; - gf_log (this->name, GF_LOG_DEBUG, - "SETDENTS %"PRId64" - %s: %s " - "(mkdir failed)", - fd->inode->ino, pathname, - strerror (op_errno)); - goto loop; - } - - /* Change the mode - * NOTE: setdents tries its best to restore the state - * of storage. if chmod and chown fail, they can - * be ignored now */ - ret = chmod (pathname, trav->buf.st_mode); - if (ret < 0) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "SETDENTS %"PRId64" - %s: %s " - "(chmod failed)", - fd->inode->ino, pathname, - strerror (op_errno)); - goto loop; - } - /* change the ownership */ - ret = chown (pathname, trav->buf.st_uid, - trav->buf.st_gid); - if (ret != 0) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "SETDENTS %"PRId64" - %s: %s " - "(chown failed)", - fd->inode->ino, pathname, - strerror (op_errno)); - goto loop; - } - } else if ((flags == GF_SET_IF_NOT_PRESENT) || - (flags != GF_SET_DIR_ONLY)) { - /* Create a 0 byte file here */ - if (S_ISREG (trav->buf.st_mode)) { - op_ret = bdb_db_icreate (bfd->ctx, - trav->name); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "SETDENTS %"PRId64" (%s) - %s: " - "%s (database entry creation" - " failed)", - fd->inode->ino, - bfd->ctx->directory, trav->name, - strerror (op_errno)); - } - } else if (S_ISLNK (trav->buf.st_mode)) { - /* TODO: impelement */; - } else { - gf_log (this->name, GF_LOG_DEBUG, - "SETDENTS %"PRId64" (%s) - %s mode=%o: " - "(unsupported file type)", - fd->inode->ino, - bfd->ctx->directory, trav->name, - trav->buf.st_mode); - } /* if(S_ISREG())...else */ - } /* if(S_ISDIR())...else if */ - loop: - /* consider the next entry */ - trav = trav->next; - } /* while(trav) */ - -out: - STACK_UNWIND (frame, op_ret, op_errno); - - GF_FREE (entry_path); - return 0; -} - -int32_t -bdb_fstat (call_frame_t *frame, - xlator_t *this, - fd_t *fd) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - struct stat stbuf = {0,}; - struct bdb_fd *bfd = NULL; - bctx_t *bctx = NULL; - char *db_path = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "FSTAT %"PRId64": EBADFD " - "(failed to find internal context in fd)", - fd->inode->ino); - op_errno = EBADFD; - op_ret = -1; - goto out; - } - - bctx = bfd->ctx; - - MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); - op_ret = lstat (db_path, &stbuf); - op_errno = errno; - if (op_ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "FSTAT %"PRId64": %s" - "(failed to stat database file %s)", - fd->inode->ino, strerror (op_errno), db_path); - goto out; - } - - stbuf.st_ino = fd->inode->ino; - stbuf.st_size = bdb_db_fread (bfd, NULL, 0, 0); - stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); - -out: - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); - return 0; -} - -gf_dirent_t * -gf_dirent_for_namen (const char *name, - size_t len) -{ - char *tmp_name = NULL; - - tmp_name = alloca (len + 1); - - memcpy (tmp_name, name, len); - - tmp_name[len] = 0; - - return gf_dirent_for_name (tmp_name); -} - -int32_t -bdb_readdir (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t off) -{ - struct bdb_dir *bfd = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - size_t filled = 0; - gf_dirent_t *this_entry = NULL; - gf_dirent_t entries; - struct dirent *entry = NULL; - off_t in_case = 0; - int32_t this_size = 0; - DBC *cursorp = NULL; - int32_t count = 0; - off_t offset = 0; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - INIT_LIST_HEAD (&entries.list); - - BDB_FCTX_GET (fd, this, &bfd); - if (bfd == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD " - "(failed to find internal context in fd)", - fd->inode->ino, size, off); - op_errno = EBADFD; - op_ret = -1; - goto out; - } - - op_ret = bdb_cursor_open (bfd->ctx, &cursorp); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD " - "(failed to open cursor to database handle)", - fd->inode->ino, size, off); - op_errno = EBADFD; - goto out; - } - - if (off) { - DBT sec = {0,}, pri = {0,}, val = {0,}; - sec.data = &(off); - sec.size = sizeof (off); - sec.flags = DB_DBT_USERMEM; - val.dlen = 0; - val.doff = 0; - val.flags = DB_DBT_PARTIAL; - - op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_SET); - if (op_ret == DB_NOTFOUND) { - offset = off; - goto dir_read; - } - } - - while (filled <= size) { - DBT sec = {0,}, pri = {0,}, val = {0,}; - - this_entry = NULL; - - sec.flags = DB_DBT_MALLOC; - pri.flags = DB_DBT_MALLOC; - val.dlen = 0; - val.doff = 0; - val.flags = DB_DBT_PARTIAL; - op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_NEXT); - - if (op_ret == DB_NOTFOUND) { - /* we reached end of the directory */ - op_ret = 0; - op_errno = 0; - break; - } else if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64":" - "(failed to read the next entry from database)", - fd->inode->ino, size, off); - op_errno = ENOENT; - break; - } /* if (op_ret == DB_NOTFOUND)...else if...else */ - - if (pri.data == NULL) { - /* NOTE: currently ignore when we get key.data == NULL. - * TODO: we should not get key.data = NULL */ - gf_log (this->name, GF_LOG_DEBUG, - "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64":" - "(null key read for entry from database)", - fd->inode->ino, size, off); - continue; - }/* if(key.data)...else */ - count++; - this_size = bdb_dirent_size (&pri); - if (this_size + filled > size) - break; - /* TODO - consider endianness here */ - this_entry = gf_dirent_for_namen ((const char *)pri.data, - pri.size); - - this_entry->d_ino = bdb_inode_transform (fd->inode->ino, - pri.data, - pri.size); - this_entry->d_off = *(uint32_t *)sec.data; - this_entry->d_type = 0; - this_entry->d_len = pri.size + 1; - - if (sec.data) { - GF_FREE (sec.data); - } - - if (pri.data) - GF_FREE (pri.data); - - list_add_tail (&this_entry->list, &entries.list); - - filled += this_size; - }/* while */ - bdb_cursor_close (bfd->ctx, cursorp); - op_ret = filled; - op_errno = 0; - if (filled >= size) { - goto out; - } -dir_read: - /* hungry kyaa? */ - if (!offset) { - rewinddir (bfd->dir); - } else { - seekdir (bfd->dir, offset); - } - - while (filled <= size) { - this_entry = NULL; - entry = NULL; - this_size = 0; - - in_case = telldir (bfd->dir); - entry = readdir (bfd->dir); - if (!entry) - break; - - if (IS_BDB_PRIVATE_FILE(entry->d_name)) - continue; - - this_size = dirent_size (entry); - - if (this_size + filled > size) { - seekdir (bfd->dir, in_case); - break; - } - - count++; - - this_entry = gf_dirent_for_name (entry->d_name); - this_entry->d_ino = entry->d_ino; - - this_entry->d_off = entry->d_off; - - this_entry->d_type = entry->d_type; - this_entry->d_len = entry->d_reclen; - - - list_add_tail (&this_entry->list, &entries.list); - - filled += this_size; - } - op_ret = filled; - op_errno = 0; - -out: - gf_log (this->name, GF_LOG_DEBUG, - "READDIR %"PRId64" - %"GF_PRI_SIZET" (%"PRId32")" - "/%"GF_PRI_SIZET",%"PRId64":" - "(failed to read the next entry from database)", - fd->inode->ino, filled, count, size, off); - - STACK_UNWIND (frame, count, op_errno, &entries); - - gf_dirent_free (&entries); - - return 0; -} - - -int32_t -bdb_stats (call_frame_t *frame, - xlator_t *this, - int32_t flags) - -{ - int32_t op_ret = 0; - int32_t op_errno = 0; - - struct xlator_stats xlstats = {0, }, *stats = NULL; - struct statvfs buf = {0,}; - struct timeval tv; - struct bdb_private *private = NULL; - int64_t avg_read = 0; - int64_t avg_write = 0; - int64_t _time_ms = 0; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - - private = (struct bdb_private *)(this->private); - stats = &xlstats; - - op_ret = statvfs (private->export_path, &buf); - if (op_ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "STATS %s: %s", - private->export_path, strerror (op_errno)); - goto out; - } - - stats->nr_files = private->stats.nr_files; - - /* client info is maintained at FSd */ - stats->nr_clients = private->stats.nr_clients; - - /* Number of Free block in the filesystem. */ - stats->free_disk = buf.f_bfree * buf.f_bsize; - stats->total_disk_size = buf.f_blocks * buf.f_bsize; /* */ - stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize; - - /* Calculate read and write usage */ - gettimeofday (&tv, NULL); - - /* Read */ - _time_ms = (tv.tv_sec - private->init_time.tv_sec) * 1000 + - ((tv.tv_usec - private->init_time.tv_usec) / 1000); - - avg_read = (_time_ms) ? (private->read_value / _time_ms) : 0;/* KBps */ - avg_write = (_time_ms) ? (private->write_value / _time_ms) : 0; - - _time_ms = (tv.tv_sec - private->prev_fetch_time.tv_sec) * 1000 + - ((tv.tv_usec - private->prev_fetch_time.tv_usec) / 1000); - if (_time_ms - && ((private->interval_read / _time_ms) > private->max_read)) { - private->max_read = (private->interval_read / _time_ms); - } - if (_time_ms - && ((private->interval_write / _time_ms) > private->max_write)) { - private->max_write = private->interval_write / _time_ms; - } - - stats->read_usage = avg_read / private->max_read; - stats->write_usage = avg_write / private->max_write; - - gettimeofday (&(private->prev_fetch_time), NULL); - private->interval_read = 0; - private->interval_write = 0; - -out: - STACK_UNWIND (frame, op_ret, op_errno, stats); - return 0; -} - - -int32_t -bdb_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) -{ - gf_log (this->name, GF_LOG_ERROR, - "glusterfs internal locking request. please load " - "'features/locks' translator to enable glusterfs " - "support"); - - STACK_UNWIND (frame, -1, ENOSYS); - return 0; -} - - -int32_t -bdb_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) -{ - gf_log (this->name, GF_LOG_ERROR, - "glusterfs internal locking request. please load " - "'features/locks' translator to enable glusterfs " - "support"); - - STACK_UNWIND (frame, -1, ENOSYS); - return 0; -} - - -int32_t -bdb_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) -{ - gf_log (this->name, GF_LOG_ERROR, - "glusterfs internal locking request. please load " - "'features/locks' translator to enable glusterfs " - "support"); - - STACK_UNWIND (frame, -1, ENOSYS); - return 0; -} - - -int32_t -bdb_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) -{ - gf_log (this->name, GF_LOG_ERROR, - "glusterfs internal locking request. please load " - "'features/locks' translator to enable glusterfs " - "support"); - - STACK_UNWIND (frame, -1, ENOSYS); - return 0; -} - -int32_t -bdb_checksum (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flag) -{ - char *real_path = NULL; - DIR *dir = NULL; - struct dirent *dirent = NULL; - uint8_t file_checksum[NAME_MAX] = {0,}; - uint8_t dir_checksum[NAME_MAX] = {0,}; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - int32_t idx = 0, length = 0; - bctx_t *bctx = NULL; - DBC *cursorp = NULL; - char *data = NULL; - uint8_t no_break = 1; - - GF_VALIDATE_OR_GOTO ("bdb", frame, out); - GF_VALIDATE_OR_GOTO ("bdb", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - - MAKE_REAL_PATH (real_path, this, loc->path); - - { - dir = opendir (real_path); - op_errno = errno; - GF_VALIDATE_OR_GOTO (this->name, dir, out); - while ((dirent = readdir (dir))) { - if (!dirent) - break; - - if (IS_BDB_PRIVATE_FILE(dirent->d_name)) - continue; - - length = strlen (dirent->d_name); - for (idx = 0; idx < length; idx++) - dir_checksum[idx] ^= dirent->d_name[idx]; - } /* while((dirent...)) */ - closedir (dir); - } - - { - bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); - if (bctx == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "CHECKSUM %"PRId64" (%s): ENOMEM" - "(failed to lookup database handle)", - loc->inode->ino, loc->path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - op_ret = bdb_cursor_open (bctx, &cursorp); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "CHECKSUM %"PRId64" (%s): EBADFD" - "(failed to open cursor to database handle)", - loc->inode->ino, loc->path); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - - - do { - DBT key = {0,}, value = {0,}, sec = {0,}; - - key.flags = DB_DBT_MALLOC; - value.doff = 0; - value.dlen = 0; - op_ret = bdb_cursor_get (cursorp, &sec, &key, - &value, DB_NEXT); - - if (op_ret == DB_NOTFOUND) { - op_ret = 0; - op_errno = 0; - no_break = 0; - } else if (op_ret == 0){ - /* successfully read */ - data = key.data; - length = key.size; - for (idx = 0; idx < length; idx++) - file_checksum[idx] ^= data[idx]; - - GF_FREE (key.data); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "CHECKSUM %"PRId64" (%s)", - loc->inode->ino, loc->path); - op_ret = -1; - op_errno = ENOENT; /* TODO: watch errno */ - no_break = 0; - }/* if(op_ret == DB_NOTFOUND)...else if...else */ - } while (no_break); - bdb_cursor_close (bctx, cursorp); - } -out: - if (bctx) { - /* NOTE: bctx_unref always returns success, - * see description of bctx_unref for more details */ - bctx_unref (bctx); - } - - STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum); - - return 0; -} - -/** - * notify - when parent sends PARENT_UP, send CHILD_UP event from here - */ -int32_t -notify (xlator_t *this, - int32_t event, - void *data, - ...) -{ - switch (event) - { - case GF_EVENT_PARENT_UP: - { - /* Tell the parent that bdb xlator is up */ - GF_ASSERT ((this->private != NULL) && - (BDB_ENV(this) != NULL)); - default_notify (this, GF_EVENT_CHILD_UP, data); - } - break; - default: - /* */ - break; - } - return 0; -} - - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_bdb_mt_end + 1); - - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } - - return ret; -} - -/** - * init - - */ -int32_t -init (xlator_t *this) -{ - int32_t ret = -1; - struct stat buf = {0,}; - struct bdb_private *_private = NULL; - char *directory = NULL; - bctx_t *bctx = NULL; - - GF_VALIDATE_OR_GOTO ("bdb", this, out); - - if (this->children) { - gf_log (this->name, GF_LOG_ERROR, - "'storage/bdb' translator should be used as leaf node " - "in translator tree. please remove the subvolumes" - " specified and retry."); - goto err; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_ERROR, - "'storage/bdb' translator needs at least one among " - "'protocol/server' or 'mount/fuse' translator as " - "parent. please add 'protocol/server' or 'mount/fuse' " - "as parent of 'storage/bdb' and retry. or you can also" - " try specifying mount-point on command-line."); - goto err; - } - - _private = GF_CALLOC (1, sizeof (*_private), gf_bdb_mt_bdb_private); - if (_private == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "could not allocate memory for 'storage/bdb' " - "configuration data-structure. cannot continue from " - "here"); - goto err; - } - - - ret = dict_get_str (this->options, "directory", &directory); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "'storage/bdb' needs at least " - "'option directory ' as " - "minimal configuration option. please specify an " - "export directory using " - "'option directory ' and " - "retry."); - goto err; - } - - umask (000); /* umask `masking' is done at the client side */ - - /* Check whether the specified directory exists, if not create it. */ - ret = stat (directory, &buf); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "specified export path '%s' does not exist. " - "please create the export path '%s' and retry.", - directory, directory); - goto err; - } else if (!S_ISDIR (buf.st_mode)) { - gf_log (this->name, GF_LOG_ERROR, - "specified export path '%s' is not a directory. " - "please specify a valid and existing directory as " - "export directory and retry.", - directory); - goto err; - } else { - ret = 0; - } - - - _private->export_path = gf_strdup (directory); - if (_private->export_path == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "could not allocate memory for 'storage/bdb' " - "configuration data-structure. cannot continue from " - "here"); - goto err; - } - - _private->export_path_length = strlen (_private->export_path); - - { - /* Stats related variables */ - gettimeofday (&_private->init_time, NULL); - gettimeofday (&_private->prev_fetch_time, NULL); - _private->max_read = 1; - _private->max_write = 1; - } - - this->private = (void *)_private; - - { - ret = bdb_db_init (this, this->options); - - if (ret < 0){ - gf_log (this->name, GF_LOG_ERROR, - "database environment initialisation failed. " - "manually run database recovery tool and " - "retry to run glusterfs"); - goto err; - } else { - bctx = bctx_lookup (_private->b_table, "/"); - /* NOTE: we are not doing bctx_unref() for root bctx, - * let it remain in active list forever */ - if (bctx == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "could not allocate memory for " - "'storage/bdb' configuration data-" - "structure. cannot continue from " - "here"); - goto err; - } else { - ret = 0; - goto out; - } - } - } -err: - if (_private) { - if (_private->export_path) - GF_FREE (_private->export_path); - - GF_FREE (_private); - } -out: - return ret; -} - -void -bctx_cleanup (struct list_head *head) -{ - bctx_t *trav = NULL; - bctx_t *tmp = NULL; - DB *storage = NULL; - DB *secondary = NULL; - - list_for_each_entry_safe (trav, tmp, head, list) { - LOCK (&trav->lock); - { - storage = trav->primary; - trav->primary = NULL; - - secondary = trav->secondary; - trav->secondary = NULL; - - list_del_init (&trav->list); - } - UNLOCK (&trav->lock); - - if (storage) { - storage->close (storage, 0); - storage = NULL; - } - - if (secondary) { - secondary->close (secondary, 0); - secondary = NULL; - } - } - return; -} - -void -fini (xlator_t *this) -{ - struct bdb_private *private = NULL; - int32_t ret = 0; - - private = this->private; - - if (B_TABLE(this)) { - /* close all the dbs from lru list */ - bctx_cleanup (&(B_TABLE(this)->b_lru)); - bctx_cleanup (&(B_TABLE(this)->active)); - - if (BDB_ENV(this)) { - LOCK (&private->active_lock); - { - private->active = 0; - } - UNLOCK (&private->active_lock); - - ret = pthread_join (private->checkpoint_thread, NULL); - if (ret != 0) { - gf_log (this->name, GF_LOG_CRITICAL, - "could not complete checkpointing " - "database environment. this might " - "result in inconsistencies in few" - " recent data and meta-data " - "operations"); - } - - BDB_ENV(this)->close (BDB_ENV(this), 0); - } else { - /* impossible to reach here */ - } - - GF_FREE (B_TABLE(this)); - } - GF_FREE (private); - return; -} - - -struct xlator_fops fops = { - .lookup = bdb_lookup, - .stat = bdb_stat, - .opendir = bdb_opendir, - .readdir = bdb_readdir, - .readlink = bdb_readlink, - .mknod = bdb_mknod, - .mkdir = bdb_mkdir, - .unlink = bdb_unlink, - .rmdir = bdb_rmdir, - .symlink = bdb_symlink, - .rename = bdb_rename, - .link = bdb_link, - .truncate = bdb_truncate, - .create = bdb_create, - .open = bdb_open, - .readv = bdb_readv, - .writev = bdb_writev, - .statfs = bdb_statfs, - .flush = bdb_flush, - .fsync = bdb_fsync, - .setxattr = bdb_setxattr, - .getxattr = bdb_getxattr, - .removexattr = bdb_removexattr, - .fsyncdir = bdb_fsyncdir, - .access = bdb_access, - .ftruncate = bdb_ftruncate, - .fstat = bdb_fstat, - .lk = bdb_lk, - .inodelk = bdb_inodelk, - .finodelk = bdb_finodelk, - .entrylk = bdb_entrylk, - .fentrylk = bdb_fentrylk, - .setdents = bdb_setdents, - .getdents = bdb_getdents, - .checksum = bdb_checksum, - .setattr = bdb_setattr, - .fsetattr = bdb_fsetattr, -}; - -struct xlator_cbks cbks = { - .release = bdb_release, - .releasedir = bdb_releasedir -}; - - -struct volume_options options[] = { - { .key = { "directory" }, - .type = GF_OPTION_TYPE_PATH, - .description = "export directory" - }, - { .key = { "logdir" }, - .type = GF_OPTION_TYPE_PATH, - .description = "directory to be used by libdb for writing" - "transaction logs. NOTE: in absence of 'logdir' " - "export directory itself will be used as 'logdir' also" - }, - { .key = { "errfile" }, - .type = GF_OPTION_TYPE_PATH, - .description = "path to be used for libdb error logging. " - "NOTE: absence of 'errfile' will disable any " - "error logging by libdb." - }, - { .key = { "dir-mode" }, - .type = GF_OPTION_TYPE_ANY /* base 8 number */ - }, - { .key = { "file-mode" }, - .type = GF_OPTION_TYPE_ANY, - .description = "file mode for regular files. stat() on a regular file" - " returns the mode specified by this option. " - "NOTE: specify value in octal" - }, - { .key = { "page-size" }, - .type = GF_OPTION_TYPE_SIZET, - .min = 512, - .max = 16384, - .description = "size of pages used to hold data by libdb. set it to " - "block size of exported filesystem for " - "optimal performance" - }, - { .key = { "open-db-lru-limit" }, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 2048, - .description = "maximum number of per directory databases that can " - "be kept open. NOTE: for _advanced_ users only." - }, - { .key = { "lock-timeout" }, - .type = GF_OPTION_TYPE_TIME, - .min = 0, - .max = 4260000, - .description = "define the maximum time a lock request can " - "be blocked by libdb. NOTE: only for _advanced_ users." - " do not specify this option when not sure." - }, - { .key = { "checkpoint-interval" }, - .type = GF_OPTION_TYPE_TIME, - .min = 1, - .max = 86400, - .description = "define the time interval between two consecutive " - "libdb checpoints. setting to lower value will leave " - "bdb perform slowly, but guarantees that minimum data" - " will be lost in case of a crash. NOTE: this option " - "is valid only when " - "'option mode=\"persistent\"' is set." - }, - { .key = { "transaction-timeout" }, - .type = GF_OPTION_TYPE_TIME, - .min = 0, - .max = 4260000, - .description = "maximum time for which a transaction can block " - "waiting for required resources." - }, - { .key = { "mode" }, - .type = GF_OPTION_TYPE_BOOL, - .value = { "cache", "persistent" }, - .description = "cache: data recovery is not guaranteed in case " - "of crash. persistent: data recovery is guaranteed, " - "since all operations are transaction protected." - }, - { .key = { "access-mode" }, - .type = GF_OPTION_TYPE_STR, - .value = {"btree", "hash" }, - .description = "chose the db access method. " - "NOTE: for _advanced_ users. leave the choice to " - "glusterfs when in doubt." - }, - { .key = { NULL } } -}; diff --git a/xlators/storage/bdb/src/bdb.h b/xlators/storage/bdb/src/bdb.h deleted file mode 100644 index da8937a02..000000000 --- a/xlators/storage/bdb/src/bdb.h +++ /dev/null @@ -1,530 +0,0 @@ -/* - Copyright (c) 2008-2011 Gluster, Inc. - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - . -*/ - -#ifndef _BDB_H -#define _BDB_H - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include -#include - -#include - -#ifdef linux -#ifdef __GLIBC__ -#include -#else -#include -#endif -#endif - -#ifdef HAVE_SYS_XATTR_H -#include -#endif - -#ifdef HAVE_SYS_EXTATTR_H -#include -#endif - -#include -#include "xlator.h" -#include "inode.h" -#include "compat.h" -#include "compat-errno.h" -#include "fd.h" -#include "syscall.h" - -#define BDB_STORAGE "/glusterfs_storage.db" - -/* numbers are not so reader-friendly, so lets have ON and OFF macros */ -#define ON 1 -#define OFF 0 - -#define BDB_DEFAULT_LRU_LIMIT 100 -#define BDB_DEFAULT_HASH_SIZE 100 - -#define BDB_ENOSPC_THRESHOLD 25600 - -#define BDB_DEFAULT_CHECKPOINT_INTERVAL 30 - -#define BCTX_ENV(bctx) (bctx->table->dbenv) - -#define BDB_EXPORT_PATH_LEN(_private) \ - (((struct bdb_private *)_private)->export_path_length) - -#define BDB_KEY_FROM_FREQUEST_KEY(_key) (&(key[15])) - -#define BDB_EXPORT_PATH(_private) \ - (((struct bdb_private *)_private)->export_path) -/* MAKE_REAL_PATH(var,this,path) - * make the real path on the underlying file-system - * - * @var: destination to hold the real path - * @this: pointer to xlator_t corresponding to bdb xlator - * @path: path, as seen from mount-point - */ -#define MAKE_REAL_PATH(var, this, path) do { \ - int base_len = BDB_EXPORT_PATH_LEN(this->private); \ - var = alloca (strlen (path) + base_len + 2); \ - strcpy (var, BDB_EXPORT_PATH(this->private)); \ - strcpy (&var[base_len], path); \ - } while (0) - - -#define BDB_TIMED_LOG(_errno,_counter) \ - ((_errno == ENOTSUP) && (((++_counter) % GF_UNIVERSAL_ANSWER) == 1)) - -#define GF_FILE_CONTENT_REQUEST ZR_FILE_CONTENT_REQUEST - -/* MAKE_REAL_PATH_TO_STORAGE_DB(var,this,path) - * make the real path to the storage-database file on file-system - * - * @var: destination to hold the real path - * @this: pointer to xlator_t corresponding to bdb xlator - * @path: path of the directory, as seen from mount-point - */ -#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \ - int base_len = BDB_EXPORT_PATH_LEN(this->private); \ - var = alloca (strlen (path) + \ - base_len + \ - strlen (BDB_STORAGE)); \ - strcpy (var, BDB_EXPORT_PATH(this->private)); \ - strcpy (&var[base_len], path); \ - strcat (var, BDB_STORAGE); \ - } while (0) - -/* MAKE_KEY_FROM_PATH(key,path) - * make a 'key', which we use as key in the underlying database by using - * the path - * - * @key: destination to hold the key - * @path: path to file as seen from mount-point - */ -#define MAKE_KEY_FROM_PATH(key, path) do { \ - char *tmp = alloca (strlen (path)); \ - strcpy (tmp, path); \ - key = basename (tmp); \ - }while (0); - -/* IS_BDB_PRIVATE_FILE(name) - * check if a given 'name' is bdb xlator's internal file name - * - * @name: basename of a file. - * - * bdb xlator reserves file names 'glusterfs_storage.db', - * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*' - * (used by libdb) - */ -#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \ - (!strcmp(name, "glusterfs_storage.db")) || \ - (!strcmp(name, "glusterfs_ns.db")) || \ - (!strncmp(name, "log.0000", 8))) - -/* check if 'name' is '.' or '..' entry */ -#define IS_DOT_DOTDOT(name) \ - ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2))) - -/* BDB_ICTX_SET(this,inode,bctx) - * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories. - * this will happen either in lookup() or mkdir(). - * - * @this: pointer xlator_t of bdb xlator. - * @inode: inode where 'struct bdb_ctx *' has to be stored. - * @bctx: a 'struct bdb_ctx *' - */ -#define BDB_ICTX_SET(_inode,_this,_bctx) do{ \ - inode_ctx_put(_inode, _this, (uint64_t)(long)_bctx); \ - }while (0); - -#define BDB_ICTX_GET(_inode,_this,_bctxp) do { \ - uint64_t tmp_bctx = 0; \ - inode_ctx_get (_inode, _this, &tmp_bctx); \ - *_bctxp = tmp_bctx; \ - }while (0); - -/* BDB_FCTX_SET(this,fd,bctx) - * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories. - * this will happen either in lookup() or mkdir(). - * - * @this: pointer xlator_t of bdb xlator. - * @inode: inode where 'struct bdb_ctx *' has to be stored. - * @bctx: a 'struct bdb_ctx *' - */ -#define BDB_FCTX_SET(_fd,_this,_bfd) do{ \ - fd_ctx_set(_fd, _this, (uint64_t)(long)_bfd); \ - }while (0); - -#define BDB_FCTX_GET(_fd,_this,_bfdp) do { \ - uint64_t tmp_bfd = 0; \ - fd_ctx_get (_fd, _this, &tmp_bfd); \ - *_bfdp = (void *)(long)tmp_bfd; \ - }while (0); - - -/* maximum number of open dbs that bdb xlator will ever have */ -#define BDB_MAX_OPEN_DBS 100 - -/* convert file size to block-count */ -#define BDB_COUNT_BLOCKS(size,blksize) (((size + blksize - 1)/blksize) - 1) - -/* file permissions, again macros are more readable */ -#define RWXRWXRWX 0777 -#define DEFAULT_FILE_MODE 0600 -#define DEFAULT_DIR_MODE 0755 - -/* see, if have a valid file permissions specification in @mode */ -#define IS_VALID_FILE_MODE(mode) (!(mode & (~RWXRWXRWX))) -#define IS_VALID_DIR_MODE(mode) (!(mode & (~(RWXRWXRWX))) - -/* maximum retries for a failed transactional operation */ -#define BDB_MAX_RETRIES 10 - -#define BDB_LL_PAGE_SIZE_DEFAULT 4096 -#define BDB_LL_PAGE_SIZE_MIN 4096 -#define BDB_LL_PAGE_SIZE_MAX 65536 - -#define PAGE_SIZE_IN_RANGE(_page_size) \ - ((_page_size >= BDB_LL_PAGE_SIZE_MIN) \ - && (table->page_size <= BDB_LL_PAGE_SIZE_MAX)) - -typedef struct bctx_table bctx_table_t; -typedef struct bdb_ctx bctx_t; -typedef struct bdb_cache bdb_cache_t; -typedef struct bdb_private bdb_private_t; - -struct bctx_table { - /* flags to be used for opening each database */ - uint64_t dbflags; - - /* cache: can be either ON or OFF */ - uint64_t cache; - - /* used to lock the 'struct bctx_table *' */ - gf_lock_t lock; - - /* lock for checkpointing */ - gf_lock_t checkpoint_lock; - - /* hash table of 'struct bdb_ctx' */ - struct list_head *b_hash; - - /* list of active 'struct bdb_ctx' */ - struct list_head active; - - /* lru list of inactive 'struct bdb_ctx' */ - struct list_head b_lru; - struct list_head purge; - uint32_t lru_limit; - uint32_t lru_size; - uint32_t hash_size; - - /* access mode for accessing the databases, can be DB_HASH, DB_BTREE */ - DBTYPE access_mode; - - /* DB_ENV under which every db operation is carried over */ - DB_ENV *dbenv; - int32_t transaction; - xlator_t *this; - - /* page-size of DB, DB->set_pagesize(), should be set before DB->open */ - uint64_t page_size; -}; - -struct bdb_ctx { - /* controller members */ - - /* lru list of 'struct bdb_ctx's, a bdb_ctx can exist in one of - * b_hash or lru lists */ - struct list_head list; - - /* directory 'name' hashed list of 'struct bdb_ctx's */ - struct list_head b_hash; - - struct bctx_table *table; - int32_t ref; /* reference count */ - gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */ - - char *directory; /* directory path */ - - /* pointer to open database, that resides inside this directory */ - DB *primary; - DB *secondary; - uint32_t cache; /* cache ON or OFF */ - - /* per directory cache, bdb xlator's internal cache */ - struct list_head c_list; /* linked list of cached records */ - int32_t c_count; /* number of cached records */ - - /* index to hash table list, to which this ctx belongs */ - int32_t key_hash; - char *db_path; /* absolute path to db file */ -}; - -struct bdb_fd { - /* pointer to bdb_ctx of the parent directory */ - struct bdb_ctx *ctx; - - /* name of the file. NOTE: basename, not the complete path */ - char *key; - int32_t flags; /* open flags */ -}; - -struct bdb_dir { - /* pointer to bdb_ctx of this directory */ - struct bdb_ctx *ctx; - - /* open directory pointer, as returned by opendir() */ - DIR *dir; - - char *path; /* path to this directory */ -}; - -/* cache */ -struct bdb_cache { - /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */ - struct list_head c_list; - - /* name of the file this cache holds. NOTE: basename of file */ - char *key; - char *data; /* file content */ - - /* size of the file content that this cache holds */ - size_t size; -}; - - -struct bdb_private { - /* pointer to inode table that we use */ - inode_table_t *itable; - int32_t temp; /**/ - char is_stateless; /**/ - - /* path to the export directory - * (option directory ) */ - char *export_path; - - /* length of 'export_path' string */ - int32_t export_path_length; - - /* statistics */ - /* Statistics, provides activity of the server */ - struct xlator_stats stats; - - struct timeval prev_fetch_time; - struct timeval init_time; - int32_t max_read; /* */ - int32_t max_write; /* */ - - /* Used to calculate the max_read value */ - int64_t interval_read; - - /* Used to calculate the max_write value */ - int64_t interval_write; - int64_t read_value; /* Total read, from init */ - int64_t write_value; /* Total write, from init */ - - /* bdb xlator specific private data */ - - /* flags used for opening DB_ENV for this xlator */ - uint64_t envflags; - - /* flags to be used for opening each database */ - uint64_t dbflags; - - /* cache: can be either ON or OFF */ - uint64_t cache; - - /* transaction: can be either ON or OFF */ - uint32_t transaction; - uint32_t active; - gf_lock_t active_lock; - struct bctx_table *b_table; - - /* access mode for accessing the databases, can be DB_HASH, DB_BTREE - * (option access-mode ) */ - DBTYPE access_mode; - - /* mode for each and every file stored on bdb - * (option file-mode ) */ - mode_t file_mode; - - /* mode for each and every directory stored on bdb - * (option dir-mode ) */ - mode_t dir_mode; - - /* mode for each and every symlink stored on bdb */ - mode_t symlink_mode; - - /* pthread_t object used for creating checkpoint thread */ - pthread_t checkpoint_thread; - - /* time duration between two consecutive checkpoint operations. - * (option checkpoint-interval ) */ - uint32_t checkpoint_interval; - - /* environment log directory (option logdir ) */ - char *logdir; - - /* errfile path, used by environment to print detailed error log. - * (option errfile ) */ - char *errfile; - - /* DB_ENV->set_errfile() expects us to fopen - * the errfile before doing DB_ENV->set_errfile() */ - FILE *errfp; - - /* used by DB_ENV->set_timeout to set the timeout for - * a transactionally encapsulated DB->operation() to - * timeout before waiting for locks to be released. - * (option transaction-timeout ) - */ - uint32_t txn_timeout; - uint32_t lock_timeout; - - /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/ - uint32_t log_auto_remove; - uint32_t log_region_max; -}; - - -static inline int32_t -bdb_txn_begin (DB_ENV *dbenv, - DB_TXN **ptxnid) -{ - return dbenv->txn_begin (dbenv, NULL, ptxnid, 0); -} - -static inline int32_t -bdb_txn_abort (DB_TXN *txnid) -{ - return txnid->abort (txnid); -} - -static inline int32_t -bdb_txn_commit (DB_TXN *txnid) -{ - return txnid->commit (txnid, 0); -} - -void * -bdb_db_stat (bctx_t *bctx, - DB_TXN *txnid, - uint32_t flags); - -/*int32_t -bdb_db_get(struct bdb_ctx *bctx, - DB_TXN *txnid, - const char *key_string, - char **buf, - size_t size, - off_t offset); -*/ -int32_t -bdb_db_fread (struct bdb_fd *bfd, char *bufp, size_t size, off_t offset); - -int32_t -bdb_db_iread (struct bdb_ctx *bctx, const char *key, char **bufp); - -#define BDB_TRUNCATE_RECORD 0xcafebabe - -/*int32_t -bdb_db_put (struct bdb_ctx *bctx, - DB_TXN *txnid, - const char *key_string, - const char *buf, - size_t size, - off_t offset, - int32_t flags); -*/ -int32_t -bdb_db_icreate (struct bdb_ctx *bctx, const char *key); - -int32_t -bdb_db_fwrite (struct bdb_fd *bfd, char *buf, size_t size, off_t offset); - -int32_t -bdb_db_iwrite (struct bdb_ctx *bctx, const char *key, char *buf, size_t size); - -int32_t -bdb_db_itruncate (struct bdb_ctx *bctx, const char *key); - -int32_t -bdb_db_iremove (struct bdb_ctx *bctx, - const char *key); - -ino_t -bdb_inode_transform (ino_t parent, - const char *name, - size_t namelen); - -int32_t -bdb_cursor_open (struct bdb_ctx *bctx, - DBC **cursorp); - -int32_t -bdb_cursor_get (DBC *cursorp, - DBT *sec, DBT *pri, - DBT *value, - int32_t flags); - - -int32_t -bdb_cursor_close (struct bdb_ctx *ctx, - DBC *cursorp); - - -int32_t -bdb_dirent_size (DBT *key); - -int32_t -dirent_size (struct dirent *entry); - -int -bdb_db_init (xlator_t *this, - dict_t *options); - -void -bdb_dbs_from_dict_close (dict_t *this, - char *key, - data_t *value, - void *data); - -bctx_t * -bctx_lookup (struct bctx_table *table, - const char *path); - -bctx_t * -bctx_parent -(struct bctx_table *table, - const char *path); - -bctx_t * -bctx_unref (bctx_t *ctx); - -bctx_t * -bctx_ref (bctx_t *ctx); - -#endif /* _BDB_H */ -- cgit