summaryrefslogtreecommitdiffstats
path: root/extras/ganesha/scripts/ganesha-ha.sh
diff options
context:
space:
mode:
Diffstat (limited to 'extras/ganesha/scripts/ganesha-ha.sh')
-rw-r--r--[-rwxr-xr-x]extras/ganesha/scripts/ganesha-ha.sh942
1 files changed, 642 insertions, 300 deletions
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
index 79a99c49c33..9790a719e10 100755..100644
--- a/extras/ganesha/scripts/ganesha-ha.sh
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright 2015 Red Hat Inc. All Rights Reserved
+# Copyright 2015-2016 Red Hat Inc. All Rights Reserved
#
# Pacemaker+Corosync High Availability for NFS-Ganesha
#
@@ -20,21 +20,85 @@
# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to
# the new host.
+GANESHA_HA_SH=$(realpath $0)
HA_NUM_SERVERS=0
HA_SERVERS=""
-HA_CONFDIR=""
HA_VOL_NAME="gluster_shared_storage"
-HA_VOL_MNT="/var/run/gluster/shared_storage"
+HA_VOL_MNT="/run/gluster/shared_storage"
+HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
SERVICE_MAN="DISTRO_NOT_FOUND"
-CONF=$(cat /etc/sysconfig/ganesha | grep "CONFFILE" | cut -f 2 -d "=")
-RHEL6_PCS_CNAME_OPTION="--name"
+# rhel, fedora id, version
+ID=""
+VERSION_ID=""
+
+PCS9OR10_PCS_CNAME_OPTION=""
+PCS9OR10_PCS_CLONE_OPTION="clone"
+SECRET_PEM="/var/lib/glusterd/nfs/secret.pem"
+
+# UNBLOCK RA uses shared_storage which may become unavailable
+# during any of the nodes reboot. Hence increase timeout value.
+PORTBLOCK_UNBLOCK_TIMEOUT="60s"
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_CONF=
+
+function find_rhel7_conf
+{
+ while [[ $# > 0 ]]
+ do
+ key="$1"
+ case $key in
+ -f)
+ CONFFILE="$2"
+ break;
+ ;;
+ *)
+ ;;
+ esac
+ shift
+ done
+}
+
+if [ -z ${CONFFILE} ]
+ then
+ find_rhel7_conf ${OPTIONS}
+
+fi
+
+GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf}
+
+usage() {
+
+ echo "Usage : add|delete|refresh-config|status"
+ echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \
+<NODE-HOSTNAME> <NODE-VIP>"
+ echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \
+<NODE-HOSTNAME>"
+ echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \
+<volume>"
+ echo "Status : ganesha-ha.sh --status <HA_CONFDIR>"
+}
determine_service_manager () {
- if [ -e "/usr/bin/systemctl" ];
+ if [ -e "/bin/systemctl" ];
then
- SERVICE_MAN="/usr/bin/systemctl"
+ SERVICE_MAN="/bin/systemctl"
elif [ -e "/sbin/invoke-rc.d" ];
then
SERVICE_MAN="/sbin/invoke-rc.d"
@@ -42,23 +106,38 @@ determine_service_manager () {
then
SERVICE_MAN="/sbin/service"
fi
- if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ]
+ if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]]
then
- echo "Service manager not recognized, exiting"
+ logger "Service manager not recognized, exiting"
exit 1
fi
}
manage_service ()
{
- if [ "$SERVICE_MAN" == "/usr/sbin/systemctl" ]
+ local action=${1}
+ local new_node=${2}
+ local option=
+
+ if [[ "${action}" == "start" ]]; then
+ option="yes"
+ else
+ option="no"
+ fi
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option"
+
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]
then
- $SERVICE_MAN $1 nfs-ganesha
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha"
else
- $SERVICE_MAN nfs-ganesha $1
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}"
fi
}
+
check_cluster_exists()
{
local name=${1}
@@ -66,7 +145,7 @@ check_cluster_exists()
if [ -e /var/run/corosync.pid ]; then
cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3)
- if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then
+ if [[ "${cluster_name}X" == "${name}X" ]]; then
logger "$name already exists, exiting"
exit 0
fi
@@ -81,7 +160,7 @@ determine_servers()
local tmp_ifs=${IFS}
local ha_servers=""
- if [[ "X${cmd}X" != "XsetupX" ]]; then
+ if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then
ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
IFS=$' '
for server in ${ha_servers} ; do
@@ -101,6 +180,13 @@ determine_servers()
fi
}
+stop_ganesha_all()
+{
+ local serverlist=${1}
+ for node in ${serverlist} ; do
+ manage_service "stop" ${node}
+ done
+}
setup_cluster()
{
@@ -108,37 +194,54 @@ setup_cluster()
local num_servers=${2}
local servers=${3}
local unclean=""
+ local quorum_policy="stop"
logger "setting up cluster ${name} with the following ${servers}"
- pcs cluster auth ${servers}
-# fedora pcs cluster setup ${name} ${servers}
-# rhel6 pcs cluster setup --name ${name} ${servers}
- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers}
+ # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
+ pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
if [ $? -ne 0 ]; then
- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed"
+ logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out"
+ #set up failed stop all ganesha process and clean up symlinks in cluster
+ stop_ganesha_all "${servers}"
exit 1;
fi
+
+ # pcs cluster auth ${servers}
+ pcs cluster auth
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster auth failed"
+ fi
+
pcs cluster start --all
if [ $? -ne 0 ]; then
logger "pcs cluster start failed"
exit 1;
fi
- sleep 3
+ sleep 1
+ # wait for the cluster to elect a DC before querying or writing
+ # to the CIB. BZ 1334092
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ while [ $? -ne 0 ]; do
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ done
+
unclean=$(pcs status | grep -u "UNCLEAN")
- while [[ "${unclean}X" = "UNCLEANX" ]]; do
+ while [[ "${unclean}X" == "UNCLEANX" ]]; do
sleep 1
unclean=$(pcs status | grep -u "UNCLEAN")
done
sleep 1
if [ ${num_servers} -lt 3 ]; then
- pcs property set no-quorum-policy=ignore
- if [ $? -ne 0 ]; then
- logger "warning: pcs property set no-quorum-policy=ignore failed"
- fi
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
fi
+
pcs property set stonith-enabled=false
if [ $? -ne 0 ]; then
logger "warning: pcs property set stonith-enabled=false failed"
@@ -146,46 +249,68 @@ setup_cluster()
}
-setup_finalize()
+setup_finalize_ha()
{
local cibfile=${1}
local stopped=""
stopped=$(pcs status | grep -u "Stopped")
- while [[ "${stopped}X" = "StoppedX" ]]; do
+ while [[ "${stopped}X" == "StoppedX" ]]; do
sleep 1
stopped=$(pcs status | grep -u "Stopped")
done
-
- pcs status | grep dead_ip-1 | sort > /var/run/ganesha/pcs_status
-
}
-setup_copy_config()
+refresh_config ()
{
- local short_host=$(hostname -s)
-
- if [ -e /var/lib/glusterd/nfs/secret.pem ]; then
- while [[ ${1} ]]; do
- if [ ${short_host} != ${1} ]; then
- scp -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/nfs/secret.pem /etc/ganesha/ganesha-ha.conf ${1}:/etc/ganesha/
- if [ $? -ne 0 ]; then
- logger "warning: scp ganesha-ha.conf to ${1} failed"
+ local short_host=$(hostname -s)
+ local VOL=${1}
+ local HA_CONFDIR=${2}
+ local short_host=$(hostname -s)
+
+ local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\
+ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
+
+
+ if [ -e ${SECRET_PEM} ]; then
+ while [[ ${3} ]]; do
+ current_host=`echo ${3} | cut -d "." -f 1`
+ if [[ ${short_host} != ${current_host} ]]; then
+ output=$(ssh -oPasswordAuthentication=no \
+-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \
+"dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ]; then
+ echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}"
+ else
+ echo "Refresh-config completed on ${current_host}."
fi
- fi
- shift
+
+ fi
+ shift
done
else
- logger "warning: scp ganesha-ha.conf to ${1} failed"
+ echo "Error: refresh-config failed. Passwordless ssh is not enabled."
+ exit 1
fi
-}
-copy_export_config ()
-{
- . /etc/ganesha/ganesha.conf
- scp $HA_VOL_SERVER:/etc/ganesha.conf ${1}:/etc/ganesha/
- scp -r $HA_VOL_SERVER:$2/exports/ ${1}:${2}/
+ # Run the same command on the localhost,
+ output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:"EXPORT(Export_Id=$export_id)" 2>&1)
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ] ; then
+ echo "Refresh-config failed on localhost."
+ else
+ echo "Success: refresh-config completed."
+ fi
}
@@ -197,7 +322,7 @@ teardown_cluster()
if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then
logger "info: ${server} is not in config, removing"
- pcs cluster stop ${server}
+ pcs cluster stop ${server} --force
if [ $? -ne 0 ]; then
logger "warning: pcs cluster stop ${server} failed"
fi
@@ -209,13 +334,13 @@ teardown_cluster()
fi
done
-# BZ 1193433 - pcs doesn't reload cluster.conf after modification
-# after teardown completes, a subsequent setup will appear to have
-# 'remembered' the deleted node. You can work around this by
-# issuing another `pcs cluster node remove $node`,
-# `crm_node -f -R $server`, or
-# `cibadmin --delete --xml-text '<node id="$server"
-# uname="$server"/>'
+ # BZ 1193433 - pcs doesn't reload cluster.conf after modification
+ # after teardown completes, a subsequent setup will appear to have
+ # 'remembered' the deleted node. You can work around this by
+ # issuing another `pcs cluster node remove $node`,
+ # `crm_node -f -R $server`, or
+ # `cibadmin --delete --xml-text '<node id="$server"
+ # uname="$server"/>'
pcs cluster stop --all
if [ $? -ne 0 ]; then
@@ -232,33 +357,12 @@ teardown_cluster()
cleanup_ganesha_config ()
{
- rm -rf ${HA_CONFDIR}/exports/*.conf
- rm -rf ${HA_CONFDIR}/.export_added
- rm -rf /etc/cluster/cluster.conf*
- sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $CONF
-}
-
-
-teardown_clean_etccluster()
-{
- local short_host=$(hostname -s)
-
- if [ -e /var/lib/glusterd/nfs/secret.pem ]; then
- while [[ ${1} ]]; do
- if [ ${short_host} != ${1} ]; then
- ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/nfs/secret.pem ${1} rm -f /etc/cluster/cluster.*
- if [ $? -ne 0 ]; then
- logger "warning: ssh ${1} rm -f /etc/cluster/cluster.* failed"
- fi
- fi
- shift
- done
- else
- logger "warning: ssh ${1} rm -f /etc/cluster/cluster.* failed"
- fi
+ rm -f /etc/corosync/corosync.conf
+ rm -rf /etc/cluster/cluster.conf*
+ rm -rf /var/lib/pacemaker/cib/*
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf
}
-
do_create_virt_ip_constraints()
{
local cibfile=${1}; shift
@@ -267,17 +371,17 @@ do_create_virt_ip_constraints()
# first a constraint location rule that says the VIP must be where
# there's a ganesha.nfsd running
- pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1
+ pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1 failed"
+ logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed"
fi
# then a set of constraint location prefers to set the prefered order
# for where a VIP should move
while [[ ${1} ]]; do
- pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight}
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight}
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight} failed"
+ logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed"
fi
weight=$(expr ${weight} + 1000)
shift
@@ -287,9 +391,9 @@ do_create_virt_ip_constraints()
# on Fedora setting appears to be additive, so to get the desired
# value we adjust the weight
# weight=$(expr ${weight} - 100)
- pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight}
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight}
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight} failed"
+ logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed"
fi
}
@@ -305,7 +409,7 @@ wrap_create_virt_ip_constraints()
# the result is "node2 node3 node4"; for node2, "node3 node4 node1"
# and so on.
while [[ ${1} ]]; do
- if [ "${1}" = "${primary}" ]; then
+ if [[ ${1} == ${primary} ]]; then
shift
while [[ ${1} ]]; do
tail=${tail}" "${1}
@@ -335,63 +439,81 @@ setup_create_resources()
{
local cibfile=$(mktemp -u)
- # mount the HA-state volume and start ganesha.nfsd on all nodes
- pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
+ # fixup /var/lib/nfs
+ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}"
+ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
+ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed"
fi
- sleep 1
- # cloned resources seem to never have their start() invoked when they
- # are created, but stop() is invoked when they are destroyed. Why???.
- # No matter, we don't want this resource agent hanging around anyway
- pcs resource delete nfs_start-clone
+
+ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION}
if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_start-clone failed"
+ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed"
fi
- pcs resource create nfs-mon ganesha_mon --clone
+ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace
+ # start method. Allow time for ganesha_mon to start and set the
+ # ganesha-active crm_attribute
+ sleep 5
+
+ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs-mon ganesha_mon --clone failed"
+ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed"
fi
- pcs resource create nfs-grace ganesha_grace --clone
+ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs-grace ganesha_grace --clone failed"
+ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1"
fi
pcs cluster cib ${cibfile}
while [[ ${1} ]]; do
- # ipaddr=$(grep ^${1} ${HA_CONFIG_FILE} | cut -d = -f 2)
- ipaddrx="VIP_${1//-/_}"
-
- ipaddr=${!ipaddrx}
-
- pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s
+ # this is variable indirection
+ # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"'
+ # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...)
+ # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1')
+ # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"')
+ # after the `eval ${clean_nvs}` there is a variable VIP_host_1
+ # with the value '10_7_6_5', and the following \$$ magic to
+ # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us
+ # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s
+ # to give us ipaddr="10.7.6.5". whew!
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
if [ $? -ne 0 ]; then
- logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=10s failed"
+ logger "warning pcs resource create ${1}-nfs_block failed"
fi
-
- pcs -f ${cibfile} resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy failed"
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
fi
- pcs -f ${cibfile} constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 failed"
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
fi
- pcs -f ${cibfile} constraint order ${1}-trigger_ip-1 then nfs-grace-clone
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order ${1}-trigger_ip-1 then nfs-grace-clone failed"
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
fi
- pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
- fi
shift
done
@@ -410,6 +532,13 @@ teardown_resources()
{
# local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2)
+ # restore /var/lib/nfs
+ logger "notice: pcs resource delete nfs_setup-clone"
+ pcs resource delete nfs_setup-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs_setup-clone failed"
+ fi
+
# delete -clone resource agents
# in particular delete the ganesha monitor so we don't try to
# trigger anything when we shut down ganesha next.
@@ -423,31 +552,10 @@ teardown_resources()
logger "warning: pcs resource delete nfs-grace-clone failed"
fi
- # unmount the HA-state volume and terminate ganesha.nfsd on all nodes
- pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
- fi
- sleep 1
- # cloned resources seem to never have their start() invoked when they
- # are created, but stop() is invoked when they are destroyed. Why???.
- pcs resource delete nfs_stop-clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_stop-clone failed"
- fi
-
while [[ ${1} ]]; do
- pcs resource delete ${1}-cluster_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete ${1}-cluster_ip-1 failed"
- fi
- pcs resource delete ${1}-trigger_ip-1
+ pcs resource delete ${1}-group
if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete ${1}-trigger_ip-1 failed"
- fi
- pcs resource delete ${1}-dead_ip-1
- if [ $? -ne 0 ]; then
- logger "info: pcs resource delete ${1}-dead_ip-1 failed"
+ logger "warning: pcs resource delete ${1}-group failed"
fi
shift
done
@@ -460,33 +568,40 @@ recreate_resources()
local cibfile=${1}; shift
while [[ ${1} ]]; do
- ipaddrx="VIP_${1//-/_}"
-
- ipaddr=${!ipaddrx}
-
- pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s
+ # this is variable indirection
+ # see the comment on the same a few lines up
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
if [ $? -ne 0 ]; then
- logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=10s failed"
+ logger "warning pcs resource create ${1}-nfs_block failed"
fi
-
- pcs -f ${cibfile} resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy failed"
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
fi
- pcs -f ${cibfile} constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 failed"
- fi
-
- pcs -f ${cibfile} constraint order ${1}-trigger_ip-1 then nfs-grace-clone
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order ${1}-trigger_ip-1 then nfs-grace-clone failed"
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
fi
- pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
fi
shift
@@ -502,30 +617,32 @@ addnode_recreate_resources()
recreate_resources ${cibfile} ${HA_SERVERS}
- pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=15s
- if [ $? -ne 0 ]; then
- logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=10s failed"
- fi
-
- pcs -f ${cibfile} resource create ${add_node}-trigger_ip-1 ocf:heartbeat:Dummy
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource create ${add_node}-trigger_ip-1 ocf:heartbeat:Dummy failed"
- fi
-
- pcs -f ${cibfile} constraint colocation add ${add_node}-cluster_ip-1 with ${add_node}-trigger_ip-1
+ pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint colocation add ${add_node}-cluster_ip-1 with ${add_node}-trigger_ip-1 failed"
+ logger "warning pcs resource create ${add_node}-nfs_block failed"
fi
-
- pcs -f ${cibfile} constraint order ${add_node}-trigger_ip-1 then nfs-grace-clone
+ pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \
+ --after ${add_node}-nfs_block
if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order ${add_node}-trigger_ip-1 then nfs-grace-clone failed"
+ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
fi
pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
if [ $? -ne 0 ]; then
logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed"
fi
+ pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \
+ ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-nfs_unblock failed"
+ fi
}
@@ -534,21 +651,13 @@ clear_resources()
local cibfile=${1}; shift
while [[ ${1} ]]; do
- pcs -f ${cibfile} resource delete ${1}-cluster_ip-1
+ pcs -f ${cibfile} resource delete ${1}-group
if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} resource delete ${1}-cluster_ip-1"
- fi
-
- pcs -f ${cibfile} resource delete ${1}-trigger_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} resource delete ${1}-trigger_ip-1"
+ logger "warning: pcs -f ${cibfile} resource delete ${1}-group"
fi
shift
done
-
- recreate_resources ${cibfile} ${add_node} ${add_vip} ${HA_SERVERS}
-
}
@@ -558,52 +667,19 @@ addnode_create_resources()
local add_vip=${1}; shift
local cibfile=$(mktemp -u)
- # mount the HA-state volume and start ganesha.nfsd on the new node
- pcs cluster cib ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib ${cibfile} failed"
- fi
-
- pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT}
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed"
- fi
-
- pcs -f ${cibfile} constraint location nfs_start-${add_node} prefers ${newnode}=INFINITY
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} constraint location nfs_start-${add_node} prefers ${newnode}=INFINITY failed"
- fi
-
- pcs -f ${cibfile} constraint order nfs_start-${add_node} then nfs-mon-clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} constraint order nfs_start-${add_node} then nfs-mon-clone failed"
- fi
-
- pcs cluster cib-push ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib-push ${cibfile} failed"
- fi
- rm -f ${cibfile}
-
# start HA on the new node
pcs cluster start ${add_node}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster start ${add_node} failed"
fi
- pcs resource delete nfs_start-${add_node}
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_start-${add_node} failed"
- fi
-
-
pcs cluster cib ${cibfile}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib ${cibfile} failed"
fi
- # delete all the -cluster_ip-1 and -trigger_ip-1 resources,
- # clearing their constraints, then create them again so we can
+ # delete all the -cluster_ip-1 resources, clearing
+ # their constraints, then create them again so we can
# recompute their constraints
clear_resources ${cibfile} ${HA_SERVERS}
addnode_recreate_resources ${cibfile} ${add_node} ${add_vip}
@@ -635,7 +711,7 @@ deletenode_delete_resources()
# recompute their constraints
clear_resources ${cibfile} ${HA_SERVERS}
recreate_resources ${cibfile} ${ha_servers}
- HA_SERVERS="${ha_servers}"
+ HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /")
create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
@@ -645,31 +721,16 @@ deletenode_delete_resources()
fi
rm -f ${cibfile}
- pcs cluster cib ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib ${cibfile} failed"
- fi
-
- pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT}
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed"
- fi
+}
- pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY failed"
- fi
- pcs cluster cib-push ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib-push ${cibfile} failed"
- fi
- rm -f ${cibfile}
+deletenode_update_haconfig()
+{
+ local name="VIP_${1}"
+ local clean_name=${name//[-.]/_}
- pcs resource delete nfs_stop-${node}
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_stop-${node} failed"
- fi
+ ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/")
+ sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf
}
@@ -679,43 +740,60 @@ setup_state_volume()
local longname=""
local shortname=""
local dname=""
+ local dirname=""
longname=$(hostname)
dname=${longname#$(hostname -s)}
while [[ ${1} ]]; do
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname} ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}
+
+ if [[ ${1} == *${dname} ]]; then
+ dirname=${1}
+ else
+ dirname=${1}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then
+ mkdir ${mnt}/nfs-ganesha/tickle_dir
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
fi
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
fi
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
fi
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
fi
- touch ${mnt}/nfs-ganesha/${1}${dname}/nfs/state
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4recov ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4recov
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
fi
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4old ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4old
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
fi
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
fi
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm.bak ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm.bak
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
fi
- if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/state ]; then
- mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/state
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
fi
for server in ${HA_SERVERS} ; do
- if [ ${server} != ${1}${dname} ]; then
- ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/${server}
- ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/${server}
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
fi
done
shift
@@ -724,18 +802,245 @@ setup_state_volume()
}
+enable_pacemaker()
+{
+ while [[ ${1} ]]; do
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable"
+ fi
+ shift
+ done
+}
+
+
+addnode_state_volume()
+{
+ local newnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${newnode} == *${dname} ]]; then
+ dirname=${newnode}
+ else
+ dirname=${newnode}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+
+ for server in ${HA_SERVERS} ; do
+
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+
+}
+
+
+delnode_state_volume()
+{
+ local delnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${delnode} == *${dname} ]]; then
+ dirname=${delnode}
+ else
+ dirname=${delnode}${dname}
+ fi
+
+ rm -rf ${mnt}/nfs-ganesha/${dirname}
+
+ for server in ${HA_SERVERS} ; do
+ if [[ ${server} != ${dirname} ]]; then
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+}
+
+
+status()
+{
+ local scratch=$(mktemp)
+ local regex_str="^${1}-cluster_ip-1"
+ local healthy=0
+ local index=1
+ local nodes
+
+ # change tabs to spaces, strip leading spaces, including any
+ # new '*' at the beginning of a line introduced in pcs-0.10.x
+ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch}
+
+ nodes[0]=${1}; shift
+
+ # make a regex of the configured nodes
+ # and initalize the nodes array for later
+ while [[ ${1} ]]; do
+
+ regex_str="${regex_str}|^${1}-cluster_ip-1"
+ nodes[${index}]=${1}
+ ((index++))
+ shift
+ done
+
+ # print the nodes that are expected to be online
+ grep -E "Online:" ${scratch}
+
+ echo
+
+ # print the VIPs and which node they are on
+ grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4
+
+ echo
+
+ # check if the VIP and port block/unblock RAs are on the expected nodes
+ for n in ${nodes[*]}; do
+
+ grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ done
+
+ grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch}
+ result=$?
+
+ if [ ${result} -eq 0 ]; then
+ echo "Cluster HA Status: BAD"
+ elif [ ${healthy} -eq 0 ]; then
+ echo "Cluster HA Status: HEALTHY"
+ else
+ echo "Cluster HA Status: FAILOVER"
+ fi
+
+ rm -f ${scratch}
+}
+
+create_ganesha_conf_file()
+{
+ if [[ "$1" == "yes" ]];
+ then
+ if [ -e $GANESHA_CONF ];
+ then
+ rm -rf $GANESHA_CONF
+ fi
+ # The symlink /etc/ganesha/ganesha.conf need to be
+ # created using ganesha conf file mentioned in the
+ # shared storage. Every node will only have this
+ # link and actual file will stored in shared storage,
+ # so that ganesha conf editing of ganesha conf will
+ # be easy as well as it become more consistent.
+
+ ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ else
+ # Restoring previous file
+ rm -rf $GANESHA_CONF
+ cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF
+ fi
+}
+
+set_quorum_policy()
+{
+ local quorum_policy="stop"
+ local num_servers=${1}
+
+ if [ ${num_servers} -lt 3 ]; then
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
+ fi
+}
+
main()
{
+
local cmd=${1}; shift
- HA_CONFDIR=${1}; shift
+ if [[ ${cmd} == *help ]]; then
+ usage
+ exit 0
+ fi
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --on
+ fi
+
+ local osid=""
+
+ osid=$(grep ^ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F ID=)
+ osid=$(grep ^VERSION_ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F VERSION_ID=)
+
+ HA_CONFDIR=${1%/}; shift
+ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
local node=""
local vip=""
- . ${HA_CONFDIR}/ganesha-ha.conf
-
- if [ -e /etc/os-release ]; then
- RHEL6_PCS_CNAME_OPTION=""
- fi
+ # ignore any comment lines
+ cfgline=$(grep ^HA_NAME= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_NAME=)
+ cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=)
case "${cmd}" in
@@ -746,17 +1051,30 @@ main()
determine_servers "setup"
- if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then
+ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x
+ # default is pcs-0.10.x options but check for
+ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly
+ if [[ ! ${ID} =~ {rhel,centos} ]]; then
+ if [[ ${VERSION_ID} == 7.* ]]; then
+ PCS9OR10_PCS_CNAME_OPTION="--name"
+ PCS9OR10_PCS_CLONE_OPTION="--clone"
+ fi
+ fi
+
+ if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then
+
+ determine_service_manager
setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
setup_create_resources ${HA_SERVERS}
+ setup_finalize_ha
+
setup_state_volume ${HA_SERVERS}
- setup_copy_config ${HA_SERVERS}
+ enable_pacemaker ${HA_SERVERS}
- setup_finalize
else
logger "insufficient servers for HA, aborting"
@@ -772,7 +1090,7 @@ main()
teardown_cluster ${HA_NAME}
- teardown_clean_etccluster ${HA_SERVERS}
+ cleanup_ganesha_config ${HA_CONFDIR}
;;
cleanup | --cleanup)
@@ -783,9 +1101,13 @@ main()
node=${1}; shift
vip=${1}; shift
- logger "adding ${node} with ${vip} to ${HA_NAME}"
+ logger "adding ${node} with ${vip} to ${HA_NAME}"
- determine_servers "add"
+ determine_service_manager
+
+ manage_service "start" ${node}
+
+ determine_servers "add"
pcs cluster node add ${node}
if [ $? -ne 0 ]; then
@@ -793,16 +1115,26 @@ main()
fi
addnode_create_resources ${node} ${vip}
-
- setup_state_volume ${node}
-
- setup_copy_config ${node}
-
- copy_export_config ${node} ${HA_CONFDIR}
-
- determine_service_manager
-
- manage_service "start"
+ # Subsequent add-node recreates resources for all the nodes
+ # that already exist in the cluster. The nodes are picked up
+ # from the entries in the ganesha-ha.conf file. Adding the
+ # newly added node to the file so that the resources specfic
+ # to this node is correctly recreated in the future.
+ clean_node=${node//[-.]/_}
+ echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf
+
+ NEW_NODES="$HA_CLUSTER_NODES,${node}"
+
+ sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \
+$HA_CONFDIR/ganesha-ha.conf
+
+ addnode_state_volume ${node}
+
+ # addnode_create_resources() already appended ${node} to
+ # HA_SERVERS, so only need to increment HA_NUM_SERVERS
+ # and set quorum policy
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
;;
delete | --delete)
@@ -819,39 +1151,49 @@ main()
logger "warning: pcs cluster node remove ${node} failed"
fi
- # TODO: delete node's directory in shared state
+ deletenode_update_haconfig ${node}
- teardown_clean_etccluster ${node}
+ delnode_state_volume ${node}
determine_service_manager
- manage-service "stop"
+ manage_service "stop" ${node}
- cleanup_ganesha_config ${HA_CONFDIR}
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
;;
status | --status)
- exec pcs status
+ determine_servers "status"
+
+ status ${HA_SERVERS}
;;
refresh-config | --refresh-config)
+ VOL=${1}
+
+ determine_servers "refresh-config"
+
+ refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS}
;;
- help | --help)
- echo "Usage : add|delete|status"
- echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \
-<NODE-IP/HOSTNAME> <NODE-VIP>"
- echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \
-<NODE-IP/HOSTNAME>"
+ setup-ganesha-conf-files | --setup-ganesha-conf-files)
+
+ create_ganesha_conf_file ${1}
;;
- *)
+
+ *)
# setup and teardown are not intended to be used by a
# casual user
+ usage
logger "Usage: ganesha-ha.sh add|delete|status"
;;
esac
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --off
+ fi
}
main $*
-