diff options
Diffstat (limited to 'extras/ganesha')
| -rw-r--r-- | extras/ganesha/config/ganesha-ha.conf.sample | 7 | ||||
| -rw-r--r-- | extras/ganesha/ocf/Makefile.am | 1 | ||||
| -rw-r--r-- | extras/ganesha/ocf/ganesha_grace | 175 | ||||
| -rw-r--r-- | extras/ganesha/ocf/ganesha_mon | 153 | ||||
| -rw-r--r-- | extras/ganesha/ocf/ganesha_nfsd | 98 | ||||
| -rw-r--r-- | extras/ganesha/scripts/Makefile.am | 6 | ||||
| -rwxr-xr-x | extras/ganesha/scripts/create-export-ganesha.sh | 79 | ||||
| -rwxr-xr-x | extras/ganesha/scripts/dbus-send.sh | 117 | ||||
| -rw-r--r-- | extras/ganesha/scripts/ganesha-ha.sh | 849 | ||||
| -rwxr-xr-x | extras/ganesha/scripts/generate-epoch.py | 48 |
10 files changed, 919 insertions, 614 deletions
diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample index 24054abfdff..c22892bde56 100644 --- a/extras/ganesha/config/ganesha-ha.conf.sample +++ b/extras/ganesha/config/ganesha-ha.conf.sample @@ -2,9 +2,6 @@ # must be unique within the subnet HA_NAME="ganesha-ha-360" # -# The gluster server from which to mount the shared data volume. -HA_VOL_SERVER="server1" -# # N.B. you may use short names or long names; you may not use IP addrs. # Once you select one, stay with it as it will be mildly unpleasant to # clean up if you switch later on. Ensure that all names - short and/or @@ -18,5 +15,5 @@ HA_CLUSTER_NODES="server1,server2,..." # Virtual IPs for each of the nodes specified above. VIP_server1="10.0.2.1" VIP_server2="10.0.2.2" -#VIP_server1.lab.redhat.com="10.0.2.1" -#VIP_server2.lab.redhat.com="10.0.2.2" +#VIP_server1_lab_redhat_com="10.0.2.1" +#VIP_server2_lab_redhat_com="10.0.2.2" diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am index 6aed9548a0f..990a609f254 100644 --- a/extras/ganesha/ocf/Makefile.am +++ b/extras/ganesha/ocf/Makefile.am @@ -9,4 +9,3 @@ ocfdir = $(prefix)/lib/ocf radir = $(ocfdir)/resource.d/heartbeat ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd - diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace index 75ec16c0fd1..825f7164597 100644 --- a/extras/ganesha/ocf/ganesha_grace +++ b/extras/ganesha/ocf/ganesha_grace @@ -30,14 +30,17 @@ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs if [ -n "$OCF_DEBUG_LIBRARY" ]; then - . $OCF_DEBUG_LIBRARY + . $OCF_DEBUG_LIBRARY else - : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} -. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi +OCF_RESKEY_grace_active_default="grace-active" +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} + ganesha_meta_data() { - cat <<END + cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="ganesha_grace"> @@ -51,19 +54,25 @@ resource agent for nfs-ganesha. <shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> <parameters> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> </parameters> <actions> <action name="start" timeout="40s" /> <action name="stop" timeout="40s" /> -<action name="status" depth="0" timeout="20s" interval="5s" /> -<action name="monitor" depth="0" timeout="20s" interval="5s" /> +<action name="status" timeout="20s" interval="60s" /> +<action name="monitor" depth="0" timeout="10s" interval="5s" /> +<action name="notify" timeout="10s" /> <action name="meta-data" timeout="20s" /> </actions> </resource-agent> END -return $OCF_SUCCESS +return ${OCF_SUCCESS} } ganesha_grace_usage() { @@ -73,10 +82,10 @@ ganesha_grace_usage() { # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) ganesha_meta_data - exit $OCF_SUCCESS + exit ${OCF_SUCCESS} ;; usage|help) ganesha_usage - exit $OCF_SUCCESS + exit ${OCF_SUCCESS} ;; *) ;; @@ -84,81 +93,112 @@ esac ganesha_grace_start() { - local result="" - local resourcename="" - local deadserver="" - local tmpIFS=${IFS} - local pid_file="/var/run/ganesha.nfsd.pid" - - # RHEL6 /etc/init.d/nfs-ganesha adds "-p /var/run/ganesha.nfsd.pid" - # RHEL7 systemd does not. Would be nicer if all distros used the - # same pid file. - if [ -e /usr/lib/systemd/system/nfs-ganesha.service ]; then - pid_file="/var/run/ganesha.pid" + local rc=${OCF_ERR_GENERIC} + local host=$(hostname -s) + + ocf_log debug "ganesha_grace_start()" + # give ganesha_mon RA a chance to set the crm_attr first + # I mislike the sleep, but it's not clear that looping + # with a small sleep is necessarily better + # start has a 40sec timeout, so a 5sec sleep here is okay + sleep 5 + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + host=$(hostname) + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null ) + if [ $? -ne 0 ]; then + ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" + fi + fi + + # Three possibilities: + # 1. There is no attribute at all and attr_updater returns + # a zero length string. This happens when + # ganesha_mon::monitor hasn't run at least once to set + # the attribute. The assumption here is that the system + # is coming up. We pretend, for now, that the node is + # healthy, to allow the system to continue coming up. + # It will cure itself in a few seconds + # 2. There is an attribute, and it has the value "1"; this + # node is healthy. + # 3. There is an attribute, but it has no value or the value + # "0"; this node is not healthy. + + # case 1 + if [[ -z "${attr}" ]]; then + return ${OCF_SUCCESS} fi - # logger "ganesha_grace_start()" - # we're here because somewhere in the cluster one or more - # of the ganesha.nfsds have died, triggering a floating IP - # address to move. Resource constraint location rules ensure - # that this is invoked before the floating IP is moved. - if [ -e ${pid_file} -a \ - -d /proc/$(cat ${pid_file} ) ]; then - # my ganesha.nfsd is still running - # find out which one died? - - pcs status | grep dead_ip-1 | sort > /tmp/.pcs_status - - result=$(diff /var/run/ganesha/pcs_status /tmp/.pcs_status | grep '^>') - if [[ ${result} ]]; then - # logger "ganesha_grace_start(), ${result}" - IFS=$'\n' - for line in ${result}; do - resourcename=$(echo ${line} | cut -f 1 | cut -d ' ' -f 3) - deadserver=${resourcename%"-dead_ip-1"} - - if [[ ${deadserver} ]]; then - # logger "ganesha_grace_start(), ${line}" - # logger "ganesha_grace_start(), dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${deadserver}" - dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${deadserver} - if [ $? -ne 0 ]; then - logger "warning: dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${deadserver} failed" - fi - fi - done - IFS=${tmpIFS} - fi - + # case 2 + if [[ "${attr}" = *"value=1" ]]; then + return ${OCF_SUCCESS} fi - return $OCF_SUCCESS + + # case 3 + return ${OCF_NOT_RUNNING} } ganesha_grace_stop() { - # logger "ganesha_grace_stop()" - return $OCF_SUCCESS + ocf_log debug "ganesha_grace_stop()" + return ${OCF_SUCCESS} +} + +ganesha_grace_notify() +{ + # since this is a clone RA we should only ever see pre-start + # or post-stop + mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" + case "${mode}" in + pre-start | post-stop) + dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} + if [ $? -ne 0 ]; then + ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed" + fi + ;; + esac + + return ${OCF_SUCCESS} } ganesha_grace_monitor() { - # logger "ganesha_grace_monitor()" - if [ ! -d /var/run/ganesha ]; then - mkdir -p /var/run/ganesha + local host=$(hostname -s) + + ocf_log debug "monitor" + + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + host=$(hostname) + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" + fi + fi + + # if there is no attribute (yet), maybe it's because + # this RA started before ganesha_mon (nfs-mon) has had + # chance to create it. In which case we'll pretend + # everything is okay this time around + if [[ -z "${attr}" ]]; then + return ${OCF_SUCCESS} fi - pcs status | grep dead_ip-1 | sort > /var/run/ganesha/pcs_status - return $OCF_SUCCESS + + if [[ "${attr}" = *"value=1" ]]; then + return ${OCF_SUCCESS} + fi + + return ${OCF_NOT_RUNNING} } ganesha_grace_validate() { - return $OCF_SUCCESS + return ${OCF_SUCCESS} } ganesha_grace_validate -# logger "ganesha_grace ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" - # Translate each action into the appropriate function call case $__OCF_ACTION in start) ganesha_grace_start @@ -167,14 +207,15 @@ stop) ganesha_grace_stop ;; status|monitor) ganesha_grace_monitor ;; +notify) ganesha_grace_notify + ;; *) ganesha_grace_usage - exit $OCF_ERR_UNIMPLEMENTED - ;; + exit ${OCF_ERR_UNIMPLEMENTED} + ;; esac rc=$? # The resource agent may optionally log a debug message -ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" exit $rc - diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon index c8e7de9c45e..2b4a9d6da84 100644 --- a/extras/ganesha/ocf/ganesha_mon +++ b/extras/ganesha/ocf/ganesha_mon @@ -29,17 +29,24 @@ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -if [ -n "$OCF_DEBUG_LIBRARY" ]; then - . $OCF_DEBUG_LIBRARY +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then + . ${OCF_DEBUG_LIBRARY} else - : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} -. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi -GRACE_DELAY=7 +# Defaults +OCF_RESKEY_ganesha_active_default="ganesha-active" +OCF_RESKEY_grace_active_default="grace-active" +OCF_RESKEY_grace_delay_default="5" + +: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}} +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} +: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}} ganesha_meta_data() { - cat <<END + cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="ganesha_mon"> @@ -53,19 +60,37 @@ resource agent for nfs-ganesha. <shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> <parameters> +<parameter name="ganesha_active"> +<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc> +<content type="string" default="ganesha-active" /> +</parameter> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> +<parameter name="grace_delay"> +<longdesc lang="en"> +NFS-Ganesha grace delay. +When changing this, adjust the ganesha_grace RA's monitor interval to match. +</longdesc> +<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc> +<content type="string" default="5" /> +</parameter> </parameters> <actions> <action name="start" timeout="40s" /> <action name="stop" timeout="40s" /> -<action name="status" depth="0" timeout="20s" interval="10s" /> +<action name="status" timeout="20s" interval="60s" /> <action name="monitor" depth="0" timeout="10s" interval="10s" /> <action name="meta-data" timeout="20s" /> </actions> </resource-agent> END -return $OCF_SUCCESS +return ${OCF_SUCCESS} } ganesha_mon_usage() { @@ -73,12 +98,12 @@ ganesha_mon_usage() { } # Make sure meta-data and usage always succeed -case $__OCF_ACTION in +case ${__OCF_ACTION} in meta-data) ganesha_meta_data - exit $OCF_SUCCESS + exit ${OCF_SUCCESS} ;; usage|help) ganesha_usage - exit $OCF_SUCCESS + exit ${OCF_SUCCESS} ;; *) ;; @@ -86,72 +111,111 @@ esac ganesha_mon_start() { + ocf_log debug "ganesha_mon_start" + ganesha_mon_monitor return $OCF_SUCCESS } ganesha_mon_stop() { + ocf_log debug "ganesha_mon_stop" return $OCF_SUCCESS } ganesha_mon_monitor() { - local short_host=$(hostname -s) - local pid_file="/var/run/ganesha.nfsd.pid" + local host=$(hostname -s) + local pid_file="/var/run/ganesha.pid" + local rhel6_pid_file="/var/run/ganesha.nfsd.pid" + local proc_pid="/proc/" # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid # RHEL7 systemd does not. Would be nice if all distros used the # same pid file. - if [ -e /usr/lib/systemd/system/nfs-ganesha.service ]; then - pid_file="/var/run/ganesha.pid" + if [ -e ${rhel6_pid_file} ]; then + pid_file=${rhel6_pid_file} + fi + if [ -e ${pid_file} ]; then + proc_pid="${proc_pid}$(cat ${pid_file})" fi - if [ -e ${pid_file} -a \ - -d /proc/$(cat ${pid_file} ) ]; then - ( pcs resource delete ${short_host}-dead_ip-1 > /dev/null 2>&1 ) + if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then - attrd_updater -n ganesha-active -v 1 + attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 if [ $? -ne 0 ]; then - logger "warning: attrd_updater -n ganesha-active -v 1 failed" + ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed" fi - else - ( pcs resource create ${short_host}-dead_ip-1 ocf:heartbeat:Dummy > /dev/null 2>&1 ) + # ganesha_grace (nfs-grace) RA follows grace-active attr + # w/ constraint location + attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 if [ $? -ne 0 ]; then - logger "warning: pcs resource create ${short_host}-dead_ip-1 ocf:heartbeat:Dummy failed" + ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed" fi - # The ${this-node}-dead_ip-1 resource is used to indicate - # that this ganesha.nfsd has died. - # VIP fail-over is then triggered by clearing the - # ganesha-active node attribute on this node. - # - # Meanwhile the ganesha_grace monitor() runs every 5 - # seconds. We need to allow time for it to run and put - # the remaining ganesha.nfsds into grace before initiating - # the VIP fail-over. - sleep ${GRACE_DELAY} - - attrd_updater -D -n ganesha-active + # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace) + # track grace-active crm_attr (attr != crm_attr) + # we can't just use the attr as there's no way to query + # its value in RHEL6 pacemaker + + crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null if [ $? -ne 0 ]; then - logger "warning: attrd_updater -D -n ganesha-active failed" + host=$(hostname) + crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null + if [ $? -ne 0 ]; then + ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed" + fi fi + + return ${OCF_SUCCESS} fi - return $OCF_SUCCESS + # VIP fail-over is triggered by clearing the + # ganesha-active node attribute on this node. + # + # Meanwhile the ganesha_grace notify() runs when its + # nfs-grace resource is disabled on a node; which + # is triggered by clearing the grace-active attribute + # on this node. + # + # We need to allow time for it to run and put + # the remaining ganesha.nfsds into grace before + # initiating the VIP fail-over. + + attrd_updater -D -n ${OCF_RESKEY_grace_active} + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed" + fi + + host=$(hostname -s) + crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null + if [ $? -ne 0 ]; then + host=$(hostname) + crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null + if [ $? -ne 0 ]; then + ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed" + fi + fi + + sleep ${OCF_RESKEY_grace_delay} + + attrd_updater -D -n ${OCF_RESKEY_ganesha_active} + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed" + fi + + return ${OCF_SUCCESS} } ganesha_mon_validate() { - return $OCF_SUCCESS + return ${OCF_SUCCESS} } ganesha_mon_validate -# logger "ganesha_mon ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" - # Translate each action into the appropriate function call -case $__OCF_ACTION in +case ${__OCF_ACTION} in start) ganesha_mon_start ;; stop) ganesha_mon_stop @@ -159,13 +223,12 @@ stop) ganesha_mon_stop status|monitor) ganesha_mon_monitor ;; *) ganesha_mon_usage - exit $OCF_ERR_UNIMPLEMENTED - ;; + exit ${OCF_ERR_UNIMPLEMENTED} + ;; esac rc=$? # The resource agent may optionally log a debug message -ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" exit $rc - diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd index e064183daef..f91e8b6b8f7 100644 --- a/extras/ganesha/ocf/ganesha_nfsd +++ b/extras/ganesha/ocf/ganesha_nfsd @@ -29,15 +29,18 @@ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -if [ -n "$OCF_DEBUG_LIBRARY" ]; then - . $OCF_DEBUG_LIBRARY +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then + . ${OCF_DEBUG_LIBRARY} else - : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} -. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi +OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage" +: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} + ganesha_meta_data() { - cat <<END + cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="ganesha_nfsd"> @@ -59,16 +62,16 @@ resource agent for nfs-ganesha. </parameters> <actions> -<action name="start" timeout="40s" /> -<action name="stop" timeout="40s" /> -<action name="status" depth="0" timeout="20s" interval="1m" /> -<action name="monitor" depth="0" timeout="10s" interval="1m" /> +<action name="start" timeout="5s" /> +<action name="stop" timeout="5s" /> +<action name="status" depth="0" timeout="5s" interval="0" /> +<action name="monitor" depth="0" timeout="5s" interval="0" /> <action name="meta-data" timeout="20s" /> </actions> </resource-agent> END -return $OCF_SUCCESS +return ${OCF_SUCCESS} } ganesha_nfsd_usage() { @@ -78,10 +81,10 @@ ganesha_nfsd_usage() { # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) ganesha_meta_data - exit $OCF_SUCCESS + exit ${OCF_SUCCESS} ;; usage|help) ganesha_usage - exit $OCF_SUCCESS + exit ${OCF_SUCCESS} ;; *) ;; @@ -89,58 +92,60 @@ esac ganesha_nfsd_start() { - return $OCF_SUCCESS + local long_host=$(hostname) + + if [[ -d /var/lib/nfs ]]; then + mv /var/lib/nfs /var/lib/nfs.backup + if [ $? -ne 0 ]; then + ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed" + fi + ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed" + fi + fi + + return ${OCF_SUCCESS} } ganesha_nfsd_stop() { - local short_host=$(hostname -s) - local long_host="" - - if [ "X${OCF_RESOURCE_INSTANCE:0:9}X" = "Xnfs_startX" ]; then - - # if this is any nfs_start, go ahead. worst case we - # find the link already exists and do nothing - long_host=$(hostname) - - if [ -d /var/lib/nfs ]; then - mv /var/lib/nfs /var/lib/nfs.backup - ln -s $OCF_RESKEY_ha_vol_mnt/nfs-ganesha/${long_host}/nfs /var/lib/nfs - if [ $? -ne 0 ]; then - logger "warning: ln -s $OCF_RESKEY_ha_vol_mnt/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed" - fi + if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then + rm -f /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "rm -f /var/lib/nfs failed" fi - else - - # if this is a clone resource or is specific to this node - # remove the symlink and restore /var/lib/nfs - - if [ "X${OCF_RESOURCE_INSTANCE}X" = "Xnfs_stopX" ] || - [ "X${OCF_RESOURCE_INSTANCE}X" = "Xnfs_stop-${short_host}X" ]; then - if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then - rm -f /var/lib/nfs - mv /var/lib/nfs.backup /var/lib/nfs - fi + mv /var/lib/nfs.backup /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed" fi fi - return $OCF_SUCCESS + return ${OCF_SUCCESS} } ganesha_nfsd_monitor() { - return $OCF_SUCCESS + # pacemaker checks to see if RA is already running before starting it. + # if we return success, then it's presumed it's already running and + # doesn't need to be started, i.e. invoke the start action. + # return something other than success to make pacemaker invoke the + # start action + if [[ -L /var/lib/nfs ]]; then + return ${OCF_SUCCESS} + fi + return ${OCF_NOT_RUNNING} } ganesha_nfsd_validate() { - return $OCF_SUCCESS + return ${OCF_SUCCESS} } ganesha_nfsd_validate -# logger "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" +# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" # Translate each action into the appropriate function call case $__OCF_ACTION in @@ -151,13 +156,12 @@ stop) ganesha_nfsd_stop status|monitor) ganesha_nfsd_monitor ;; *) ganesha_nfsd_usage - exit $OCF_ERR_UNIMPLEMENTED - ;; + exit ${OCF_ERR_UNIMPLEMENTED} + ;; esac rc=$? # The resource agent may optionally log a debug message -ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" exit $rc - diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am index e71e2f6456b..7e345fd5f19 100644 --- a/extras/ganesha/scripts/Makefile.am +++ b/extras/ganesha/scripts/Makefile.am @@ -1,4 +1,6 @@ -EXTRA_DIST= ganesha-ha.sh dbus-send.sh create-export-ganesha.sh +EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \ + ganesha-ha.sh scriptsdir = $(libexecdir)/ganesha -scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh ganesha-ha.sh +scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \ + ganesha-ha.sh diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh index ab7c282af79..3040e8138b0 100755 --- a/extras/ganesha/scripts/create-export-ganesha.sh +++ b/extras/ganesha/scripts/create-export-ganesha.sh @@ -1,20 +1,37 @@ -#/bin/bash +#!/bin/bash #This script is called by glusterd when the user #tries to export a volume via NFS-Ganesha. #An export file specific to a volume #is created in GANESHA_DIR/exports. +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + GANESHA_DIR=${1%/} -VOL=$2 -CONF= -CONFFILE= +OPTION=$2 +VOL=$3 +CONF=$GANESHA_DIR"/ganesha.conf" +declare -i EXPORT_ID function check_cmd_status() { if [ "$1" != "0" ] then rm -rf $GANESHA_DIR/exports/export.$VOL.conf + sed -i /$VOL.conf/d $CONF exit 1 fi } @@ -26,34 +43,6 @@ if [ ! -d "$GANESHA_DIR/exports" ]; check_cmd_status `echo $?` fi -function find_rhel7_conf -{ - while [[ $# > 0 ]] - do - key="$1" - case $key in - -f) - CONFFILE="$2" - ;; - *) - ;; - esac - shift - done -} - -cfgline=$(grep ^CONFFILE= /etc/sysconfig/ganesha) -eval $(echo ${cfgline} | grep -F ^CONFFILE=) - -if [ -z $CONFFILE ] - then - cfgline=$(grep ^OPTIONS= /etc/sysconfig/ganesha) - eval $(echo ${cfgline} | grep -F ^OPTIONS=) - find_rhel7_conf $cfgline - -fi -CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} - function write_conf() { echo -e "# WARNING : Using Gluster CLI will overwrite manual @@ -75,11 +64,29 @@ echo " Pseudo=\"/$VOL\";" echo ' Protocols = "3", "4" ;' echo ' Transports = "UDP","TCP";' echo ' SecType = "sys";' +echo ' Security_Label = False;' echo " }" } - -write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf -if ! (cat $CONF | grep $VOL.conf$ ) +if [ "$OPTION" = "on" ]; then -echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF + if ! (cat $CONF | grep $VOL.conf\"$ ) + then + write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf + echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF + count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l` + if [ "$count" = "1" ] ; then + EXPORT_ID=2 + else + EXPORT_ID=`cat $GANESHA_DIR/.export_added` + check_cmd_status `echo $?` + EXPORT_ID=EXPORT_ID+1 + sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \ + $GANESHA_DIR/exports/export.$VOL.conf + check_cmd_status `echo $?` + fi + echo $EXPORT_ID > $GANESHA_DIR/.export_added + fi +else + rm -rf $GANESHA_DIR/exports/export.$VOL.conf + sed -i /$VOL.conf/d $CONF fi diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh index 5f268eb37c2..9d613a0e7ad 100755 --- a/extras/ganesha/scripts/dbus-send.sh +++ b/extras/ganesha/scripts/dbus-send.sh @@ -1,102 +1,62 @@ -#/bin/bash +#!/bin/bash -declare -i EXPORT_ID -GANESHA_DIR=${1%/} -OPTION=$2 -VOL=$3 -CONF= -CONFFILE= - -function find_rhel7_conf -{ - while [[ $# > 0 ]] - do - key="$1" - case $key in - -f) - CONFFILE="$2" - break; - ;; - *) - ;; - esac - shift - done -} - -cfgline=$(grep ^CONFFILE= /etc/sysconfig/ganesha) -eval $(echo ${cfgline} | grep -F ^CONFFILE=) - -if [ -z $CONFFILE ] +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] then - cfgline=$(grep ^OPTIONS= /etc/sysconfig/ganesha) - eval $(echo ${cfgline} | grep -F ^OPTIONS=) - find_rhel7_conf $cfgline - + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha fi -CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} +GANESHA_DIR=${1%/} +OPTION=$2 +VOL=$3 +CONF=$GANESHA_DIR"/ganesha.conf" function check_cmd_status() { if [ "$1" != "0" ] - then - rm -rf $GANESHA_DIR/exports/export.$VOL.conf - sed -i /$VOL.conf/d $CONF - exit 1 + then + logger "dynamic export failed on node :${hostname -s}" fi } #This function keeps track of export IDs and increments it with every new entry function dynamic_export_add() { - count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l` - if [ "$count" = "1" ] ; - then - EXPORT_ID=2 - else - #if [ -s /var/lib/ganesha/export_removed ]; - # then - # EXPORT_ID=`head -1 /var/lib/ganesha/export_removed` - # sed -i -e "1d" /var/lib/ganesha/export_removed - # else - - EXPORT_ID=`cat $GANESHA_DIR/.export_added` - check_cmd_status `echo $?` - #fi - fi - for entry in `grep -n Export_Id $GANESHA_DIR/exports/export.$VOL.conf \ - | awk -F":" '{print$1}'` - do - sed -e "$entry s/Export_Id.*/Export_Id=$EXPORT_ID ;/" -i \ - $GANESHA_DIR/exports/export.$VOL.conf - check_cmd_status `echo $?` - dbus-send --system \ - --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ - org.ganesha.nfsd.exportmgr.AddExport \ - string:$GANESHA_DIR/exports/export.$VOL.conf \ - string:"EXPORT(Export_Id=$EXPORT_ID)" - EXPORT_ID=EXPORT_ID+1 - done - echo $EXPORT_ID > $GANESHA_DIR/.export_added + dbus-send --system \ +--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ +org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \ +string:"EXPORT(Path=/$VOL)" check_cmd_status `echo $?` } #This function removes an export dynamically(uses the export_id of the export) function dynamic_export_remove() { - grep Export_Id $GANESHA_DIR/exports/export.$VOL.conf | \ - while read entry; - do - dbus-send --print-reply --system \ - --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ - org.ganesha.nfsd.exportmgr.RemoveExport \ - uint16:$(echo $entry | awk -F"[=,;]" '{print$2}') - check_cmd_status `echo $?` - done - sed -i /$VOL.conf/d $CONF - rm -rf $GANESHA_DIR/exports/export.$VOL.conf + # Below bash fetch all the export from ShowExport command and search + # export entry based on path and then get its export entry. + # There are two possiblities for path, either entire volume will be + # exported or subdir. It handles both cases. But it remove only first + # entry from the list based on assumption that entry exported via cli + # has lowest export id value + removed_id=$(dbus-send --type=method_call --print-reply --system \ + --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ + org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \ + "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \ + | head -1) + dbus-send --print-reply --system \ +--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ +org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id + check_cmd_status `echo $?` } if [ "$OPTION" = "on" ]; @@ -108,4 +68,3 @@ if [ "$OPTION" = "off" ]; then dynamic_export_remove $@ fi - diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index f17582b727a..9790a719e10 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Red Hat Inc. All Rights Reserved +# Copyright 2015-2016 Red Hat Inc. All Rights Reserved # # Pacemaker+Corosync High Availability for NFS-Ganesha # @@ -20,18 +20,42 @@ # ensure that the NFS GRACE DBUS signal is sent after the VIP moves to # the new host. +GANESHA_HA_SH=$(realpath $0) HA_NUM_SERVERS=0 HA_SERVERS="" -HA_CONFDIR="/etc/ganesha" HA_VOL_NAME="gluster_shared_storage" -HA_VOL_MNT="/var/run/gluster/shared_storage" +HA_VOL_MNT="/run/gluster/shared_storage" +HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" SERVICE_MAN="DISTRO_NOT_FOUND" -RHEL6_PCS_CNAME_OPTION="--name" +# rhel, fedora id, version +ID="" +VERSION_ID="" + +PCS9OR10_PCS_CNAME_OPTION="" +PCS9OR10_PCS_CLONE_OPTION="clone" SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" +# UNBLOCK RA uses shared_storage which may become unavailable +# during any of the nodes reboot. Hence increase timeout value. +PORTBLOCK_UNBLOCK_TIMEOUT="60s" + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + GANESHA_CONF= -CONFFILE= function find_rhel7_conf { @@ -50,14 +74,9 @@ function find_rhel7_conf done } -cfgline=$(grep ^CONFFILE= /etc/sysconfig/ganesha) -eval $(echo ${cfgline} | grep -F ^CONFFILE=) - -if [ -z $CONFFILE ] +if [ -z ${CONFFILE} ] then - cfgline=$(grep ^OPTIONS= /etc/sysconfig/ganesha) - eval $(echo ${cfgline} | grep -F ^OPTIONS=) - find_rhel7_conf $cfgline + find_rhel7_conf ${OPTIONS} fi @@ -65,20 +84,21 @@ GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} usage() { - echo "Usage : add|delete|status" - echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ + echo "Usage : add|delete|refresh-config|status" + echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ <NODE-HOSTNAME> <NODE-VIP>" - echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ + echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ <NODE-HOSTNAME>" - echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR>\ - <volume>" + echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \ +<volume>" + echo "Status : ganesha-ha.sh --status <HA_CONFDIR>" } determine_service_manager () { - if [ -e "/usr/bin/systemctl" ]; + if [ -e "/bin/systemctl" ]; then - SERVICE_MAN="/usr/bin/systemctl" + SERVICE_MAN="/bin/systemctl" elif [ -e "/sbin/invoke-rc.d" ]; then SERVICE_MAN="/sbin/invoke-rc.d" @@ -86,9 +106,9 @@ determine_service_manager () { then SERVICE_MAN="/sbin/service" fi - if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ] + if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]] then - echo "Service manager not recognized, exiting" + logger "Service manager not recognized, exiting" exit 1 fi } @@ -97,16 +117,27 @@ manage_service () { local action=${1} local new_node=${2} - if [ "$SERVICE_MAN" == "/usr/sbin/systemctl" ] + local option= + + if [[ "${action}" == "start" ]]; then + option="yes" + else + option="no" + fi + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" + + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]] then ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha" +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" else ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}" +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}" fi } + check_cluster_exists() { local name=${1} @@ -114,7 +145,7 @@ check_cluster_exists() if [ -e /var/run/corosync.pid ]; then cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) - if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then + if [[ "${cluster_name}X" == "${name}X" ]]; then logger "$name already exists, exiting" exit 0 fi @@ -129,7 +160,7 @@ determine_servers() local tmp_ifs=${IFS} local ha_servers="" - if [[ "X${cmd}X" != "XsetupX" ]]; then + if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') IFS=$' ' for server in ${ha_servers} ; do @@ -149,6 +180,13 @@ determine_servers() fi } +stop_ganesha_all() +{ + local serverlist=${1} + for node in ${serverlist} ; do + manage_service "stop" ${node} + done +} setup_cluster() { @@ -156,37 +194,54 @@ setup_cluster() local num_servers=${2} local servers=${3} local unclean="" + local quorum_policy="stop" logger "setting up cluster ${name} with the following ${servers}" - pcs cluster auth ${servers} -# fedora pcs cluster setup ${name} ${servers} -# rhel6 pcs cluster setup --name ${name} ${servers} - pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} + # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers} + pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} if [ $? -ne 0 ]; then - logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" + logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" exit 1; fi + + # pcs cluster auth ${servers} + pcs cluster auth + if [ $? -ne 0 ]; then + logger "pcs cluster auth failed" + fi + pcs cluster start --all if [ $? -ne 0 ]; then logger "pcs cluster start failed" exit 1; fi - sleep 3 + sleep 1 + # wait for the cluster to elect a DC before querying or writing + # to the CIB. BZ 1334092 + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + while [ $? -ne 0 ]; do + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + done + unclean=$(pcs status | grep -u "UNCLEAN") - while [[ "${unclean}X" = "UNCLEANX" ]]; do + while [[ "${unclean}X" == "UNCLEANX" ]]; do sleep 1 unclean=$(pcs status | grep -u "UNCLEAN") done sleep 1 if [ ${num_servers} -lt 3 ]; then - pcs property set no-quorum-policy=ignore - if [ $? -ne 0 ]; then - logger "warning: pcs property set no-quorum-policy=ignore failed" - fi + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" fi + pcs property set stonith-enabled=false if [ $? -ne 0 ]; then logger "warning: pcs property set stonith-enabled=false failed" @@ -194,123 +249,68 @@ setup_cluster() } -setup_finalize() +setup_finalize_ha() { local cibfile=${1} local stopped="" stopped=$(pcs status | grep -u "Stopped") - while [[ "${stopped}X" = "StoppedX" ]]; do + while [[ "${stopped}X" == "StoppedX" ]]; do sleep 1 stopped=$(pcs status | grep -u "Stopped") done - - pcs status | grep dead_ip-1 | sort > /var/run/ganesha/pcs_status - } -setup_copy_config() -{ - local short_host=$(hostname -s) - local tganesha_conf=$(mktemp -u) - - if [ -e ${SECRET_PEM} ]; then - while [[ ${1} ]]; do - current_host=`echo ${1} | cut -d "." -f 1` - if [ ${short_host} != ${current_host} ]; then - scp -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} ${HA_CONFDIR}/ganesha-ha.conf ${1}:${HA_CONFDIR}/ - if [ $? -ne 0 ]; then - logger "warning: scp ganesha-ha.conf to ${1} failed" - fi - fi - shift - done - else - logger "warning: scp ganesha-ha.conf to ${1} failed" - fi -} - refresh_config () { local short_host=$(hostname -s) local VOL=${1} local HA_CONFDIR=${2} + local short_host=$(hostname -s) + + local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ + awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') - removed_id=`cat $HA_CONFDIR/exports/export.$VOL.conf |\ -grep Export_Id | cut -d " " -f8` if [ -e ${SECRET_PEM} ]; then while [[ ${3} ]]; do current_host=`echo ${3} | cut -d "." -f 1` - if [ ${short_host} != ${current_host} ]; then - scp -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} ${HA_CONFDIR}/exports/export.$VOL.conf \ -${current_host}:${HA_CONFDIR}/exports/ - grep Export_Id $HA_CONFDIR/exports/export.$VOL.conf | \ - while read entry; - do - export_id=$(echo $entry | awk -F"[=,;]" '{print$2}') - ssh -oPasswordAuthentication=no \ --oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ -"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ -/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.RemoveExport \ -uint16:$export_id" - sleep 1 - ssh -oPasswordAuthentication=no \ + if [[ ${short_host} != ${current_host} ]]; then + output=$(ssh -oPasswordAuthentication=no \ -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ -"dbus-send --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ -org.ganesha.nfsd.exportmgr.AddExport string:$HA_CONFDIR/exports/export.$VOL.conf \ -string:\"EXPORT(Export_Id=$export_id)\"" - if [ $? -ne 0 ]; then - echo "warning: refresh-config failed on" \ - " ${current_host}" - fi - done - fi - shift +"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:\"EXPORT(Export_Id=$export_id)\" 2>&1") + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ]; then + echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}" + else + echo "Refresh-config completed on ${current_host}." + fi + + fi + shift done else - echo "warning: refresh-config failed on ${1}" + echo "Error: refresh-config failed. Passwordless ssh is not enabled." + exit 1 fi -#Run the same command on the localhost, - grep Export_Id $HA_CONFDIR/exports/export.$VOL.conf | \ - while read entry; - do - export_id=$(echo $entry | awk -F"[=,;]" '{print$2}') - dbus-send --print-reply --system \ - --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ - org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id - sleep 1 - dbus-send --system \ - --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ - org.ganesha.nfsd.exportmgr.AddExport \ - string:$HA_CONFDIR/exports/export.$VOL.conf \ - string:"EXPORT(Export_Id=$export_id)" - done -} - -copy_export_config () -{ - local new_node=${1} - local tganesha_conf=$(mktemp) - local tganesha_exports=$(mktemp -d) - local short_host=$(hostname -s) - # avoid prompting for password, even with password-less scp - # scp $host1:$file $host2:$file prompts for the password - scp -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} ${HA_VOL_SERVER}:${GANESHA_CONF} $short_host:${tganesha_conf} - scp -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} ${tganesha_conf} ${new_node}:${GANESHA_CONF} - rm -f ${tganesha_conf} - - scp -r -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} ${HA_VOL_SERVER}:${HA_CONFDIR}/exports/ $short_host:${tganesha_exports} - scp -r -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} ${tganesha_exports}/exports ${new_node}:${HA_CONFDIR}/ - rm -rf ${tganesha_exports} + # Run the same command on the localhost, + output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:"EXPORT(Export_Id=$export_id)" 2>&1) + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ] ; then + echo "Refresh-config failed on localhost." + else + echo "Success: refresh-config completed." + fi } @@ -322,7 +322,7 @@ teardown_cluster() if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then logger "info: ${server} is not in config, removing" - pcs cluster stop ${server} + pcs cluster stop ${server} --force if [ $? -ne 0 ]; then logger "warning: pcs cluster stop ${server} failed" fi @@ -334,13 +334,13 @@ teardown_cluster() fi done -# BZ 1193433 - pcs doesn't reload cluster.conf after modification -# after teardown completes, a subsequent setup will appear to have -# 'remembered' the deleted node. You can work around this by -# issuing another `pcs cluster node remove $node`, -# `crm_node -f -R $server`, or -# `cibadmin --delete --xml-text '<node id="$server" -# uname="$server"/>' + # BZ 1193433 - pcs doesn't reload cluster.conf after modification + # after teardown completes, a subsequent setup will appear to have + # 'remembered' the deleted node. You can work around this by + # issuing another `pcs cluster node remove $node`, + # `crm_node -f -R $server`, or + # `cibadmin --delete --xml-text '<node id="$server" + # uname="$server"/>' pcs cluster stop --all if [ $? -ne 0 ]; then @@ -357,11 +357,10 @@ teardown_cluster() cleanup_ganesha_config () { - rm -rf ${HA_CONFDIR}/exports/*.conf - rm -rf ${HA_CONFDIR}/.export_added - rm -rf /etc/cluster/cluster.conf* - rm -rf /var/lib/pacemaker/cib/* - sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' ${GANESHA_CONF} + rm -f /etc/corosync/corosync.conf + rm -rf /etc/cluster/cluster.conf* + rm -rf /var/lib/pacemaker/cib/* + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf } do_create_virt_ip_constraints() @@ -372,17 +371,17 @@ do_create_virt_ip_constraints() # first a constraint location rule that says the VIP must be where # there's a ganesha.nfsd running - pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1 + pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 if [ $? -ne 0 ]; then - logger "warning: pcs constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1 failed" + logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed" fi # then a set of constraint location prefers to set the prefered order # for where a VIP should move while [[ ${1} ]]; do - pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight} + pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight} if [ $? -ne 0 ]; then - logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight} failed" + logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed" fi weight=$(expr ${weight} + 1000) shift @@ -392,9 +391,9 @@ do_create_virt_ip_constraints() # on Fedora setting appears to be additive, so to get the desired # value we adjust the weight # weight=$(expr ${weight} - 100) - pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight} + pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight} if [ $? -ne 0 ]; then - logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight} failed" + logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed" fi } @@ -410,7 +409,7 @@ wrap_create_virt_ip_constraints() # the result is "node2 node3 node4"; for node2, "node3 node4 node1" # and so on. while [[ ${1} ]]; do - if [ "${1}" = "${primary}" ]; then + if [[ ${1} == ${primary} ]]; then shift while [[ ${1} ]]; do tail=${tail}" "${1} @@ -440,28 +439,31 @@ setup_create_resources() { local cibfile=$(mktemp -u) - # mount the HA-state volume and start ganesha.nfsd on all nodes - pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone + # fixup /var/lib/nfs + logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}" + pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" + logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed" fi - sleep 1 - # cloned resources seem to never have their start() invoked when they - # are created, but stop() is invoked when they are destroyed. Why???. - # No matter, we don't want this resource agent hanging around anyway - pcs resource delete nfs_start-clone + + pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} if [ $? -ne 0 ]; then - logger "warning: pcs resource delete nfs_start-clone failed" + logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed" fi - pcs resource create nfs-mon ganesha_mon --clone + # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace + # start method. Allow time for ganesha_mon to start and set the + # ganesha-active crm_attribute + sleep 5 + + pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs-mon ganesha_mon --clone failed" + logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed" fi - pcs resource create nfs-grace ganesha_grace --clone + pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs-grace ganesha_grace --clone failed" + logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1" fi pcs cluster cib ${cibfile} @@ -486,30 +488,32 @@ setup_create_resources() eval tmp_ipaddr=\$${clean_name} ipaddr=${tmp_ipaddr//_/.} - pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group if [ $? -ne 0 ]; then - logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s failed" + logger "warning pcs resource create ${1}-nfs_block failed" fi - - pcs -f ${cibfile} resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block if [ $? -ne 0 ]; then - logger "warning: pcs resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy failed" + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" fi - pcs -f ${cibfile} constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 + pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 if [ $? -ne 0 ]; then - logger "warning: pcs constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 failed" + logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" fi - pcs -f ${cibfile} constraint order ${1}-trigger_ip-1 then nfs-grace-clone + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} if [ $? -ne 0 ]; then - logger "warning: pcs constraint order ${1}-trigger_ip-1 then nfs-grace-clone failed" + logger "warning pcs resource create ${1}-nfs_unblock failed" fi - pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 - if [ $? -ne 0 ]; then - logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" - fi shift done @@ -528,6 +532,13 @@ teardown_resources() { # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2) + # restore /var/lib/nfs + logger "notice: pcs resource delete nfs_setup-clone" + pcs resource delete nfs_setup-clone + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete nfs_setup-clone failed" + fi + # delete -clone resource agents # in particular delete the ganesha monitor so we don't try to # trigger anything when we shut down ganesha next. @@ -541,31 +552,10 @@ teardown_resources() logger "warning: pcs resource delete nfs-grace-clone failed" fi - # unmount the HA-state volume and terminate ganesha.nfsd on all nodes - pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone - if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" - fi - sleep 1 - # cloned resources seem to never have their start() invoked when they - # are created, but stop() is invoked when they are destroyed. Why???. - pcs resource delete nfs_stop-clone - if [ $? -ne 0 ]; then - logger "warning: pcs resource delete nfs_stop-clone failed" - fi - while [[ ${1} ]]; do - pcs resource delete ${1}-cluster_ip-1 - if [ $? -ne 0 ]; then - logger "warning: pcs resource delete ${1}-cluster_ip-1 failed" - fi - pcs resource delete ${1}-trigger_ip-1 - if [ $? -ne 0 ]; then - logger "warning: pcs resource delete ${1}-trigger_ip-1 failed" - fi - pcs resource delete ${1}-dead_ip-1 + pcs resource delete ${1}-group if [ $? -ne 0 ]; then - logger "info: pcs resource delete ${1}-dead_ip-1 failed" + logger "warning: pcs resource delete ${1}-group failed" fi shift done @@ -588,29 +578,30 @@ recreate_resources() eval tmp_ipaddr=\$${clean_name} ipaddr=${tmp_ipaddr//_/.} - pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s - if [ $? -ne 0 ]; then - logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=10s failed" - fi - - pcs -f ${cibfile} resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group if [ $? -ne 0 ]; then - logger "warning: pcs resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy failed" + logger "warning pcs resource create ${1}-nfs_block failed" fi - - pcs -f ${cibfile} constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block if [ $? -ne 0 ]; then - logger "warning: pcs constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 failed" + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" fi - pcs -f ${cibfile} constraint order ${1}-trigger_ip-1 then nfs-grace-clone + pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 if [ $? -ne 0 ]; then - logger "warning: pcs constraint order ${1}-trigger_ip-1 then nfs-grace-clone failed" + logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" fi - pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} if [ $? -ne 0 ]; then - logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" + logger "warning pcs resource create ${1}-nfs_unblock failed" fi shift @@ -626,30 +617,32 @@ addnode_recreate_resources() recreate_resources ${cibfile} ${HA_SERVERS} - pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=15s + pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group if [ $? -ne 0 ]; then - logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=10s failed" + logger "warning pcs resource create ${add_node}-nfs_block failed" fi - - pcs -f ${cibfile} resource create ${add_node}-trigger_ip-1 ocf:heartbeat:Dummy - if [ $? -ne 0 ]; then - logger "warning: pcs resource create ${add_node}-trigger_ip-1 ocf:heartbeat:Dummy failed" - fi - - pcs -f ${cibfile} constraint colocation add ${add_node}-cluster_ip-1 with ${add_node}-trigger_ip-1 + pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \ + --after ${add_node}-nfs_block if [ $? -ne 0 ]; then - logger "warning: pcs constraint colocation add ${add_node}-cluster_ip-1 with ${add_node}-trigger_ip-1 failed" - fi - - pcs -f ${cibfile} constraint order ${add_node}-trigger_ip-1 then nfs-grace-clone - if [ $? -ne 0 ]; then - logger "warning: pcs constraint order ${add_node}-trigger_ip-1 then nfs-grace-clone failed" + logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" fi pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 if [ $? -ne 0 ]; then logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed" fi + pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \ + ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-nfs_unblock failed" + fi } @@ -658,14 +651,9 @@ clear_resources() local cibfile=${1}; shift while [[ ${1} ]]; do - pcs -f ${cibfile} resource delete ${1}-cluster_ip-1 - if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} resource delete ${1}-cluster_ip-1" - fi - - pcs -f ${cibfile} resource delete ${1}-trigger_ip-1 + pcs -f ${cibfile} resource delete ${1}-group if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} resource delete ${1}-trigger_ip-1" + logger "warning: pcs -f ${cibfile} resource delete ${1}-group" fi shift @@ -679,52 +667,19 @@ addnode_create_resources() local add_vip=${1}; shift local cibfile=$(mktemp -u) - # mount the HA-state volume and start ganesha.nfsd on the new node - pcs cluster cib ${cibfile} - if [ $? -ne 0 ]; then - logger "warning: pcs cluster cib ${cibfile} failed" - fi - - pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} - if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed" - fi - - pcs -f ${cibfile} constraint location nfs_start-${add_node} prefers ${add_node}=INFINITY - if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} constraint location nfs_start-${add_node} prefers ${add_node}=INFINITY failed" - fi - - pcs -f ${cibfile} constraint order nfs_start-${add_node} then nfs-mon-clone - if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} constraint order nfs_start-${add_node} then nfs-mon-clone failed" - fi - - pcs cluster cib-push ${cibfile} - if [ $? -ne 0 ]; then - logger "warning: pcs cluster cib-push ${cibfile} failed" - fi - rm -f ${cibfile} - # start HA on the new node pcs cluster start ${add_node} if [ $? -ne 0 ]; then logger "warning: pcs cluster start ${add_node} failed" fi - pcs resource delete nfs_start-${add_node} - if [ $? -ne 0 ]; then - logger "warning: pcs resource delete nfs_start-${add_node} failed" - fi - - pcs cluster cib ${cibfile} if [ $? -ne 0 ]; then logger "warning: pcs cluster cib ${cibfile} failed" fi - # delete all the -cluster_ip-1 and -trigger_ip-1 resources, - # clearing their constraints, then create them again so we can + # delete all the -cluster_ip-1 resources, clearing + # their constraints, then create them again so we can # recompute their constraints clear_resources ${cibfile} ${HA_SERVERS} addnode_recreate_resources ${cibfile} ${add_node} ${add_vip} @@ -766,31 +721,16 @@ deletenode_delete_resources() fi rm -f ${cibfile} - pcs cluster cib ${cibfile} - if [ $? -ne 0 ]; then - logger "warning: pcs cluster cib ${cibfile} failed" - fi - - pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} - if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed" - fi +} - pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY - if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY failed" - fi - pcs cluster cib-push ${cibfile} - if [ $? -ne 0 ]; then - logger "warning: pcs cluster cib-push ${cibfile} failed" - fi - rm -f ${cibfile} +deletenode_update_haconfig() +{ + local name="VIP_${1}" + local clean_name=${name//[-.]/_} - pcs resource delete nfs_stop-${node} - if [ $? -ne 0 ]; then - logger "warning: pcs resource delete nfs_stop-${node} failed" - fi + ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") + sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf } @@ -813,7 +753,9 @@ setup_state_volume() dirname=${1}${dname} fi - + if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then + mkdir ${mnt}/nfs-ganesha/tickle_dir + fi if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then mkdir ${mnt}/nfs-ganesha/${dirname} fi @@ -825,9 +767,11 @@ setup_state_volume() fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd fi if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov @@ -837,15 +781,17 @@ setup_state_volume() fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak fi if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state fi for server in ${HA_SERVERS} ; do - if [ ${server} != ${dirname} ]; then + if [[ ${server} != ${dirname} ]]; then ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} fi @@ -856,6 +802,214 @@ setup_state_volume() } +enable_pacemaker() +{ + while [[ ${1} ]]; do + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable" + fi + shift + done +} + + +addnode_state_volume() +{ + local newnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${newnode} == *${dname} ]]; then + dirname=${newnode} + else + dirname=${newnode}${dname} + fi + + if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then + mkdir ${mnt}/nfs-ganesha/${dirname} + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + + for server in ${HA_SERVERS} ; do + + if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done + +} + + +delnode_state_volume() +{ + local delnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${delnode} == *${dname} ]]; then + dirname=${delnode} + else + dirname=${delnode}${dname} + fi + + rm -rf ${mnt}/nfs-ganesha/${dirname} + + for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then + rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done +} + + +status() +{ + local scratch=$(mktemp) + local regex_str="^${1}-cluster_ip-1" + local healthy=0 + local index=1 + local nodes + + # change tabs to spaces, strip leading spaces, including any + # new '*' at the beginning of a line introduced in pcs-0.10.x + pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch} + + nodes[0]=${1}; shift + + # make a regex of the configured nodes + # and initalize the nodes array for later + while [[ ${1} ]]; do + + regex_str="${regex_str}|^${1}-cluster_ip-1" + nodes[${index}]=${1} + ((index++)) + shift + done + + # print the nodes that are expected to be online + grep -E "Online:" ${scratch} + + echo + + # print the VIPs and which node they are on + grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 + + echo + + # check if the VIP and port block/unblock RAs are on the expected nodes + for n in ${nodes[*]}; do + + grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done + + grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then + echo "Cluster HA Status: BAD" + elif [ ${healthy} -eq 0 ]; then + echo "Cluster HA Status: HEALTHY" + else + echo "Cluster HA Status: FAILOVER" + fi + + rm -f ${scratch} +} + +create_ganesha_conf_file() +{ + if [[ "$1" == "yes" ]]; + then + if [ -e $GANESHA_CONF ]; + then + rm -rf $GANESHA_CONF + fi + # The symlink /etc/ganesha/ganesha.conf need to be + # created using ganesha conf file mentioned in the + # shared storage. Every node will only have this + # link and actual file will stored in shared storage, + # so that ganesha conf editing of ganesha conf will + # be easy as well as it become more consistent. + + ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF + else + # Restoring previous file + rm -rf $GANESHA_CONF + cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF + fi +} + +set_quorum_policy() +{ + local quorum_policy="stop" + local num_servers=${1} + + if [ ${num_servers} -lt 3 ]; then + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" + fi +} main() { @@ -865,27 +1019,29 @@ main() usage exit 0 fi - if [[ ${cmd} != *status ]]; then - HA_CONFDIR=${1%/}; shift - local ha_conf=${HA_CONFDIR}/ganesha-ha.conf - local node="" - local vip="" - # ignore any comment lines - cfgline=$(grep ^HA_NAME= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_NAME=) - cfgline=$(grep ^HA_VOL_SERVER= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_VOL_SERVER=) - cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) - - # we'll pretend that nobody ever edits /etc/os-release - if [ -e /etc/os-release ]; then - eval $(grep -F "REDHAT_SUPPORT_PRODUCT=" /etc/os-release) - [ "$REDHAT_SUPPORT_PRODUCT" == "Fedora" ] && RHEL6_PCS_CNAME_OPTION="" - fi + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --on fi + local osid="" + + osid=$(grep ^ID= /etc/os-release) + eval $(echo ${osid} | grep -F ID=) + osid=$(grep ^VERSION_ID= /etc/os-release) + eval $(echo ${osid} | grep -F VERSION_ID=) + + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" + local vip="" + + # ignore any comment lines + cfgline=$(grep ^HA_NAME= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_NAME=) + cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) + case "${cmd}" in setup | --setup) @@ -895,17 +1051,30 @@ main() determine_servers "setup" - if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then + # Fedora 29+ and rhel/centos 8 has PCS-0.10.x + # default is pcs-0.10.x options but check for + # rhel/centos 7 (pcs-0.9.x) and adjust accordingly + if [[ ! ${ID} =~ {rhel,centos} ]]; then + if [[ ${VERSION_ID} == 7.* ]]; then + PCS9OR10_PCS_CNAME_OPTION="--name" + PCS9OR10_PCS_CLONE_OPTION="--clone" + fi + fi + + if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then + + determine_service_manager setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" setup_create_resources ${HA_SERVERS} + setup_finalize_ha + setup_state_volume ${HA_SERVERS} - setup_copy_config ${HA_SERVERS} + enable_pacemaker ${HA_SERVERS} - setup_finalize else logger "insufficient servers for HA, aborting" @@ -920,6 +1089,8 @@ main() teardown_resources ${HA_SERVERS} teardown_cluster ${HA_NAME} + + cleanup_ganesha_config ${HA_CONFDIR} ;; cleanup | --cleanup) @@ -932,8 +1103,6 @@ main() logger "adding ${node} with ${vip} to ${HA_NAME}" - copy_export_config ${node} ${HA_CONFDIR} - determine_service_manager manage_service "start" ${node} @@ -946,20 +1115,26 @@ main() fi addnode_create_resources ${node} ${vip} - #Subsequent add-node recreates resources for all the nodes - #that already exist in the cluster. The nodes are picked up - #from the entries in the ganesha-ha.conf file. Adding the - #newly added node to the file so that the resources specfic - #to this node is correctly recreated in the future. - echo "VIP_$node=\"$vip\"" >> ${HA_CONFDIR}/ganesha-ha.conf + # Subsequent add-node recreates resources for all the nodes + # that already exist in the cluster. The nodes are picked up + # from the entries in the ganesha-ha.conf file. Adding the + # newly added node to the file so that the resources specfic + # to this node is correctly recreated in the future. + clean_node=${node//[-.]/_} + echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf - NEW_NODES="$HA_CLUSTER_NODES,$node" + NEW_NODES="$HA_CLUSTER_NODES,${node}" sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ $HA_CONFDIR/ganesha-ha.conf - HA_SERVERS="${HA_SERVERS} ${node}" - setup_copy_config ${HA_SERVERS} + addnode_state_volume ${node} + + # addnode_create_resources() already appended ${node} to + # HA_SERVERS, so only need to increment HA_NUM_SERVERS + # and set quorum policy + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1) + set_quorum_policy ${HA_NUM_SERVERS} ;; delete | --delete) @@ -976,20 +1151,22 @@ $HA_CONFDIR/ganesha-ha.conf logger "warning: pcs cluster node remove ${node} failed" fi - ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") - sed -i "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" ${HA_CONFDIR}/ganesha-ha.conf - - setup_copy_config ${HA_SERVERS} + deletenode_update_haconfig ${node} - rm -rf ${HA_VOL_MNT}/nfs-ganesha/{node} + delnode_state_volume ${node} determine_service_manager manage_service "stop" ${node} + + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1) + set_quorum_policy ${HA_NUM_SERVERS} ;; status | --status) - exec pcs status + determine_servers "status" + + status ${HA_SERVERS} ;; refresh-config | --refresh-config) @@ -1000,7 +1177,12 @@ $HA_CONFDIR/ganesha-ha.conf refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS} ;; - *) + setup-ganesha-conf-files | --setup-ganesha-conf-files) + + create_ganesha_conf_file ${1} + ;; + + *) # setup and teardown are not intended to be used by a # casual user usage @@ -1008,7 +1190,10 @@ $HA_CONFDIR/ganesha-ha.conf ;; esac + + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --off + fi } main $* - diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py new file mode 100755 index 00000000000..77af014bab9 --- /dev/null +++ b/extras/ganesha/scripts/generate-epoch.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# +# Generates unique epoch value on each gluster node to be used by +# nfs-ganesha service on that node. +# +# Configure 'EPOCH_EXEC' option to this script path in +# '/etc/sysconfig/ganesha' file used by nfs-ganesha service. +# +# Construct epoch as follows - +# first 32-bit contains the now() time +# rest 32-bit value contains the local glusterd node uuid + +import time +import binascii + +# Calculate the now() time into a 64-bit integer value +def epoch_now(): + epoch_time = int(time.mktime(time.localtime())) << 32 + return epoch_time + +# Read glusterd UUID and extract first 32-bit of it +def epoch_uuid(): + file_name = '/var/lib/glusterd/glusterd.info' + + for line in open(file_name): + if "UUID" in line: + glusterd_uuid = line.split('=')[1].strip() + + uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-","")) + + epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000 + return epoch_uuid + +# Construct epoch as follows - +# first 32-bit contains the now() time +# rest 32-bit value contains the local glusterd node uuid +epoch = (epoch_now() | epoch_uuid()) +print((str(epoch))) + +exit(0) |
