From ea6a1ebe931e49464eb17205b94f5c87765cf696 Mon Sep 17 00:00:00 2001 From: Soumya Koduri Date: Fri, 8 Jul 2016 12:30:25 +0530 Subject: commn-HA: Add portblock RA to tickle packets post failover(/back) Portblock resource-agents are used to send tickle ACKs so as to reset the oustanding tcp connections. This can be used to reduce the time taken by the NFS clients to reconnect post IP failover/failback. Two new resource agents (nfs_block and nfs_unblock) of type ocf:portblock with action block & unblock are created for each Virtual-IP (cluster_ip-1). These resource agents along with cluster_ip-1 RA are grouped in the order of block->IP->unblock and also the entire group maintains same colocation rules so that they reside on the same node at any given point of time. The contents of tickle_dir are of the following format - * A file is created for each of the VIPs used in the ganesha cluster. * Each of those files contain entries about clients connected as below: SourceIP:port_num DestinationIP:port_num Hence when one server failsover, connections of the clients connected to other VIPs are not affected. Note: During testing I observed that tickle ACKs are sent during failback but not during failover, though I/O successfully resumed post failover. Also added a dependency on portblock RA for glusterfs-ganesha package as it may not be available (as part of resource-agents package) in all the distributions. Change-Id: Icad6169449535f210d9abe302c2a6971a0a96d6f BUG: 1354439 Signed-off-by: Soumya Koduri Reviewed-on: http://review.gluster.org/14878 NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Kaleb KEITHLEY Smoke: Gluster Build System Reviewed-by: Niels de Vos --- extras/ganesha/scripts/ganesha-ha.sh | 38 ++++++++++++++++++++++++++---------- glusterfs.spec.in | 5 +++++ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index 8b55abbc6c5..143897f2401 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -432,17 +432,17 @@ do_create_virt_ip_constraints() # first a constraint location rule that says the VIP must be where # there's a ganesha.nfsd running - pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1 + pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 if [ $? -ne 0 ]; then - logger "warning: pcs constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1 failed" + logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed" fi # then a set of constraint location prefers to set the prefered order # for where a VIP should move while [[ ${1} ]]; do - pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight} + pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight} if [ $? -ne 0 ]; then - logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight} failed" + logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed" fi weight=$(expr ${weight} + 1000) shift @@ -452,9 +452,9 @@ do_create_virt_ip_constraints() # on Fedora setting appears to be additive, so to get the desired # value we adjust the weight # weight=$(expr ${weight} - 100) - pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight} + pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight} if [ $? -ne 0 ]; then - logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight} failed" + logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed" fi } @@ -549,9 +549,16 @@ setup_create_resources() eval tmp_ipaddr=\$${clean_name} ipaddr=${tmp_ipaddr//_/.} - pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block if [ $? -ne 0 ]; then - logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s failed" + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" fi pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 @@ -559,6 +566,14 @@ setup_create_resources() logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" fi + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_unblock failed" + fi + + shift done @@ -597,9 +612,9 @@ teardown_resources() fi while [[ ${1} ]]; do - pcs resource delete ${1}-cluster_ip-1 + pcs resource delete ${1}-group if [ $? -ne 0 ]; then - logger "warning: pcs resource delete ${1}-cluster_ip-1 failed" + logger "warning: pcs resource delete ${1}-group failed" fi shift done @@ -765,6 +780,9 @@ setup_state_volume() fi + if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then + mkdir ${mnt}/nfs-ganesha/tickle_dir + fi if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then mkdir ${mnt}/nfs-ganesha/${dirname} fi diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 4f388f74ba3..56b1868b8d6 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -369,6 +369,8 @@ Requires: nfs-ganesha-gluster, pcs, dbus %if ( 0%{?rhel} && 0%{?rhel} == 6 ) Requires: cman, pacemaker, corosync %endif +# we need portblock resource-agent +Requires: %{_prefix}/lib/ocf/resource.d/portblock %description ganesha GlusterFS is a distributed file-system capable of scaling to several @@ -1234,6 +1236,9 @@ exit 0 %endif %changelog +* Sun Jul 31 2016 Soumya Koduri +- Add dependency on portblock resource agent for ganesha package (#1354439) + * Wed Jul 15 2016 Aravinda VK - Added new subpackage events(glusterfs-events) (#1334044) -- cgit