summaryrefslogtreecommitdiffstats
path: root/xlators/storage/bd/src/bd-helper.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/bd/src/bd-helper.c')
-rw-r--r--xlators/storage/bd/src/bd-helper.c562
1 files changed, 562 insertions, 0 deletions
diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c
new file mode 100644
index 00000000000..2c1b77a9b3e
--- /dev/null
+++ b/xlators/storage/bd/src/bd-helper.c
@@ -0,0 +1,562 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <lvm2app.h>
+
+#include "bd.h"
+#include "run.h"
+
+int
+bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
+}
+
+int
+bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ ret = inode_ctx_get (inode, this, &ctx_int);
+ if (ret)
+ return ret;
+ if (ctx)
+ *ctx = (bd_attr_t *) ctx_int;
+out:
+ return ret;
+}
+
+void
+bd_local_free (xlator_t *this, bd_local_t *local)
+{
+ if (!local)
+ return;
+ if (local->fd)
+ fd_unref (local->fd);
+ else if (local->loc.path)
+ loc_wipe (&local->loc);
+ if (local->dict)
+ dict_unref (local->dict);
+ if (local->inode)
+ inode_unref (local->inode);
+ if (local->bdatt) {
+ GF_FREE (local->bdatt->type);
+ GF_FREE (local->bdatt);
+ }
+ mem_put (local);
+ local = NULL;
+}
+
+bd_local_t *
+bd_local_init (call_frame_t *frame, xlator_t *this)
+{
+ frame->local = mem_get0 (this->local_pool);
+ if (!frame->local)
+ return NULL;
+
+ return frame->local;
+}
+
+/*
+ * VG are set with the tag in GF_XATTR_VOL_ID_KEY:<uuid> format.
+ * This function validates this tag agains volume-uuid. Also goes
+ * through LV list to find out if a thin-pool is configured or not.
+ */
+int bd_scan_vg (xlator_t *this, bd_priv_t *priv)
+{
+ vg_t brick = NULL;
+ data_t *tmp_data = NULL;
+ struct dm_list *tags = NULL;
+ int op_ret = -1;
+ uuid_t dict_uuid = {0, };
+ uuid_t vg_uuid = {0, };
+ gf_boolean_t uuid = _gf_false;
+ lvm_str_list_t *strl = NULL;
+ struct dm_list *lv_dm_list = NULL;
+ lv_list_t *lv_list = NULL;
+ struct dm_list *dm_seglist = NULL;
+ lvseg_list_t *seglist = NULL;
+ lvm_property_value_t prop = {0, };
+ gf_boolean_t thin = _gf_false;
+ const char *lv_name = NULL;
+
+ brick = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!brick) {
+ gf_log (this->name, GF_LOG_CRITICAL, "VG %s is not found",
+ priv->vg);
+ return ENOENT;
+ }
+
+ lv_dm_list = lvm_vg_list_lvs (brick);
+ if (!lv_dm_list)
+ goto check;
+
+ dm_list_iterate_items (lv_list, lv_dm_list) {
+ dm_seglist = lvm_lv_list_lvsegs (lv_list->lv);
+ if (!dm_seglist)
+ continue;
+ dm_list_iterate_items (seglist, dm_seglist) {
+ prop = lvm_lvseg_get_property (seglist->lvseg,
+ "segtype");
+ if (!prop.is_valid || !prop.value.string)
+ continue;
+ if (!strcmp (prop.value.string, "thin-pool")) {
+ thin = _gf_true;
+ lv_name = lvm_lv_get_name (lv_list->lv);
+ priv->pool = gf_strdup (lv_name);
+ gf_log (THIS->name, GF_LOG_INFO, "Thin Pool "
+ "\"%s\" will be used for thin LVs",
+ lv_name);
+ break;
+ }
+ }
+ }
+
+check:
+ /* If there is no volume-id set in dict, we cant validate */
+ tmp_data = dict_get (this->options, "volume-id");
+ if (!tmp_data) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = uuid_parse (tmp_data->data, dict_uuid);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong volume-id (%s) set in volume file",
+ tmp_data->data);
+ op_ret = -1;
+ goto out;
+ }
+
+ tags = lvm_vg_get_tags (brick);
+ if (!tags) { /* no tags in the VG */
+ gf_log (this->name, GF_LOG_ERROR,
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
+ op_ret = -1;
+ goto out;
+ }
+ dm_list_iterate_items (strl, tags) {
+ if (!strncmp (strl->str, GF_XATTR_VOL_ID_KEY,
+ strlen (GF_XATTR_VOL_ID_KEY))) {
+ uuid = _gf_true;
+ break;
+ }
+ }
+ /* UUID tag is not set in VG */
+ if (!uuid) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = uuid_parse (strl->str + strlen (GF_XATTR_VOL_ID_KEY) + 1,
+ vg_uuid);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong volume-id (%s) set in VG", strl->str);
+ op_ret = -1;
+ goto out;
+ }
+ if (uuid_compare (dict_uuid, vg_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mismatching volume-id (%s) received. "
+ "already is a part of volume %s ",
+ tmp_data->data, vg_uuid);
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ lvm_vg_close (brick);
+
+ if (!thin)
+ gf_log (THIS->name, GF_LOG_WARNING, "No thin pool found in "
+ "VG %s\n", priv->vg);
+ else
+ priv->caps |= BD_CAPS_THIN;
+
+ return op_ret;
+}
+
+/* FIXME: Move this code to common place, so posix and bd xlator can use */
+char *
+page_aligned_alloc (size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+
+ alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_common_mt_char);
+ if (!alloc_buf)
+ return NULL;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+
+ return alloc_buf;
+}
+
+static int
+__bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd_p)
+{
+ int ret = -1;
+ int _fd = -1;
+ char *devpath = NULL;
+ bd_fd_t *bdfd = NULL;
+ uint64_t tmp_bdfd = 0;
+ bd_priv_t *priv = this->private;
+ bd_gfid_t gfid = {0, };
+ bd_attr_t *bdatt = NULL;
+
+ /* not bd file */
+ if (fd->inode->ia_type != IA_IFREG ||
+ bd_inode_ctx_get (fd->inode, this, &bdatt))
+ return 0;
+
+ ret = __fd_ctx_get (fd, this, &tmp_bdfd);
+ if (ret == 0) {
+ bdfd = (void *)(long) tmp_bdfd;
+ *bdfd_p = bdfd;
+ return 0;
+ }
+
+ uuid_utoa_r (fd->inode->gfid, gfid);
+ asprintf (&devpath, "/dev/%s/%s", priv->vg, gfid);
+ if (!devpath)
+ goto out;
+
+ _fd = open (devpath, O_RDWR | O_LARGEFILE, 0);
+ if (_fd < 0) {
+ ret = errno;
+ gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", devpath,
+ strerror (ret));
+ goto out;
+ }
+ bdfd = GF_CALLOC (1, sizeof(bd_fd_t), gf_bd_fd);
+ BD_VALIDATE_MEM_ALLOC (bdfd, ret, out);
+
+ bdfd->fd = _fd;
+ bdfd->flag = O_RDWR | O_LARGEFILE;
+ if (__fd_ctx_set (fd, this, (uint64_t)(long)bdfd) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the fd context fd=%p", fd);
+ goto out;
+ }
+
+ *bdfd_p = bdfd;
+
+ ret = 0;
+out:
+ FREE (devpath);
+ if (ret) {
+ close (_fd);
+ GF_FREE (bdfd);
+ }
+ return ret;
+}
+
+int
+bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd)
+{
+ int ret;
+
+ /* FIXME: Is it ok to fd->lock here ? */
+ LOCK (&fd->lock);
+ {
+ ret = __bd_fd_ctx_get (this, fd, bdfd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+/*
+ * Validates if LV exists for given inode or not.
+ * Returns 0 if LV exists and size also matches.
+ * If LV does not exist -1 returned
+ * If LV size mismatches, returnes 1 also lv_size is updated with actual
+ * size
+ */
+int
+bd_validate_bd_xattr (xlator_t *this, char *bd, char **type,
+ uint64_t *lv_size, uuid_t uuid)
+{
+ char *path = NULL;
+ int ret = -1;
+ bd_gfid_t gfid = {0, };
+ bd_priv_t *priv = this->private;
+ struct stat stbuf = {0, };
+ uint64_t size = 0;
+ vg_t vg = NULL;
+ lv_t lv = NULL;
+ char *bytes = NULL;
+
+ bytes = strrchr (bd, ':');
+ if (bytes) {
+ *bytes = '\0';
+ bytes++;
+ gf_string2bytesize (bytes, &size);
+ }
+
+ if (strcmp (bd, BD_LV) && strcmp (bd, BD_THIN)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid xattr %s", bd);
+ return -1;
+ }
+ *type = gf_strdup (bd);
+
+ /*
+ * Check if LV really exist, there could be a failure
+ * after setxattr and successful LV creation
+ */
+ uuid_utoa_r (uuid, gfid);
+ gf_asprintf (&path, "/dev/%s/%s", priv->vg, gfid);
+ if (!path) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "insufficient memory");
+ return 0;
+ }
+
+ /* Destination file does not exist */
+ if (stat (path, &stbuf)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat failed for path %s", path);
+ return -1;
+ }
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "r", 0);
+ if (!vg) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "VG %s does not exist?", priv->vg);
+ ret = -1;
+ goto out;
+ }
+
+ lv = lvm_lv_from_name (vg, gfid);
+ if (!lv) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "LV %s does not exist", gfid);
+ ret = -1;
+ goto out;
+ }
+
+ *lv_size = lvm_lv_get_size (lv);
+ if (size == *lv_size) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = 1;
+
+out:
+ if (vg)
+ lvm_vg_close (vg);
+
+ GF_FREE (path);
+ return ret;
+}
+
+static int
+create_thin_lv (char *vg, char *pool, char *lv, uint64_t extent)
+{
+ int ret = -1;
+ runner_t runner = {0, };
+ char *path = NULL;
+ struct stat stat = {0, };
+
+ runinit (&runner);
+ runner_add_args (&runner, LVM_CREATE, NULL);
+ runner_add_args (&runner, "--thin", NULL);
+ runner_argprintf (&runner, "%s/%s", vg, pool);
+ runner_add_args (&runner, "--name", NULL);
+ runner_argprintf (&runner, "%s", lv);
+ runner_add_args (&runner, "--virtualsize", NULL);
+ runner_argprintf (&runner, "%ldB", extent);
+ runner_start (&runner);
+ runner_end (&runner);
+
+ gf_asprintf (&path, "/dev/%s/%s", vg, lv);
+ if (!path) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (lstat (path, &stat) < 0)
+ ret = EAGAIN;
+ else
+ ret = 0;
+out:
+ GF_FREE (path);
+ return ret;
+}
+
+int
+bd_create (uuid_t uuid, uint64_t size, char *type, bd_priv_t *priv)
+{
+ int ret = 0;
+ vg_t vg = NULL;
+ bd_gfid_t gfid = {0, };
+
+ uuid_utoa_r (uuid, gfid);
+
+ if (!strcmp (type, BD_THIN))
+ return create_thin_lv (priv->vg, priv->pool, gfid,
+ size);
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ return ENOENT;
+ }
+
+ if (!lvm_vg_create_lv_linear (vg, gfid, size)) {
+ gf_log (THIS->name, GF_LOG_WARNING, "lvm_vg_create_lv_linear "
+ "failed");
+ ret = errno;
+ }
+
+ lvm_vg_close (vg);
+
+ return ret;
+}
+
+int32_t
+bd_resize (bd_priv_t *priv, uuid_t uuid, off_t size)
+{
+ uint64_t new_size = 0;
+ runner_t runner = {0, };
+ bd_gfid_t gfid = {0, };
+ int ret = 0;
+ vg_t vg = NULL;
+ lv_t lv = NULL;
+
+ uuid_utoa_r (uuid, gfid);
+
+ runinit (&runner);
+
+ runner_add_args (&runner, LVM_RESIZE, NULL);
+ runner_argprintf (&runner, "%s/%s", priv->vg, gfid);
+ runner_argprintf (&runner, "-L%ldb", size);
+ runner_add_args (&runner, "-f", NULL);
+
+ runner_start (&runner);
+ runner_end (&runner);
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ return EAGAIN;
+ }
+
+ lv = lvm_lv_from_name (vg, gfid);
+ if (!lv) {
+ gf_log (THIS->name, GF_LOG_WARNING, "LV %s not found", gfid);
+ ret = EIO;
+ goto out;
+ }
+ new_size = lvm_lv_get_size (lv);
+
+ if (new_size != size) {
+ gf_log (THIS->name, GF_LOG_WARNING, "resized LV size %ld does "
+ "not match requested size %ld", new_size, size);
+ ret = EIO;
+ }
+
+out:
+ lvm_vg_close (vg);
+ return ret;
+}
+
+uint64_t
+bd_get_default_extent (bd_priv_t *priv)
+{
+ vg_t vg = NULL;
+ uint64_t size = 0;
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ return 0;
+ }
+
+ size = lvm_vg_get_extent_size (vg);
+
+ lvm_vg_close (vg);
+
+ return size;
+}
+
+/*
+ * Adjusts the user specified size to VG specific extent size
+ */
+uint64_t
+bd_adjust_size (bd_priv_t *priv, uint64_t size)
+{
+ uint64_t extent = 0;
+ uint64_t nr_ex = 0;
+
+ extent = bd_get_default_extent (priv);
+ if (!extent)
+ return 0;
+
+ nr_ex = size / extent;
+ if (size % extent)
+ nr_ex++;
+
+ size = extent * nr_ex;
+
+ return size;
+}
+
+int
+bd_delete_lv (bd_priv_t *priv, const char *lv_name, int *op_errno)
+{
+ vg_t vg = NULL;
+ lv_t lv = NULL;
+ int ret = -1;
+
+ *op_errno = 0;
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ *op_errno = ENOENT;
+ return -1;
+ }
+ lv = lvm_lv_from_name (vg, lv_name);
+ if (!lv) {
+ gf_log (THIS->name, GF_LOG_WARNING, "No such LV %s", lv_name);
+ *op_errno = ENOENT;
+ goto out;
+ }
+ ret = lvm_vg_remove_lv (lv);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_WARNING, "removing LV %s failed",
+ lv_name);
+ *op_errno = errno;
+ goto out;
+ }
+out:
+ lvm_vg_close (vg);
+
+ return ret;
+}