Resource based failover - give it a try ;)

Luis Claudio R.Goncalves lclaudio@conectiva.com.br
Mon, 23 Apr 2001 11:49:24 -0300


Hello!

   I sent this patch to Alan some weeks ago and he is doing some heavy
testing. Anyway, it has passed my tests and I'd like having someone else
testing it.
   I created a script called /usr/lib/heartbeat/hb_stanby that once called
in node A, forces node A to become secondary (standby) and node B to
takeover resources held by A. Node A won't go standby if:

	- Node A is already secondary;
	- The other node is down or
	- There's a standby conversation already running.

   This patch can be applied against a plain 0.4.9 heartbeat and it don't
touch other heartbeat business. If you're brave enough or if you need this
feature, please, give it a try.
   There are lots of ugly debug messages... but as soon as it gets
approved, I'll clean up the code.

							Luis

-------------------------cut here----------------------
diff -ruN heartbeat-0.4.9/Makefile heartbeat-0.4.9-se/Makefile
--- heartbeat-0.4.9/Makefile	Mon Apr  2 15:54:31 2001
+++ heartbeat-0.4.9-se/Makefile	Wed Mar 21 17:20:52 2001
@@ -10,7 +10,7 @@
 #
 PKG=heartbeat
 VERS=0.4.9
-RPMREL=1
+RPMREL=1se
 
 INITD=$(shell [ -d /etc/init.d ] && echo /etc/init.d || echo /etc/rc.d/init.d )
 LOGROTATED=/etc/logrotate.d
diff -ruN heartbeat-0.4.9/heartbeat/Makefile heartbeat-0.4.9-se/heartbeat/Makefile
--- heartbeat-0.4.9/heartbeat/Makefile	Mon Apr  2 15:54:31 2001
+++ heartbeat-0.4.9-se/heartbeat/Makefile	Tue Mar 20 18:08:16 2001
@@ -134,7 +134,7 @@
 
 PRODUCTS = $(LIBCMDS) $(LIBS) 
 
-LIBSCRIPTS = lib/mach_down lib/req_resource lib/ResourceManager
+LIBSCRIPTS = lib/mach_down lib/req_resource lib/ResourceManager lib/hb_standby
 
 RESOURCECMDS= resource.d/IPaddr resource.d/AudibleAlarm resource.d/Raid1 resource.d/Filesystem
 
diff -ruN heartbeat-0.4.9/heartbeat/ha_msg.h heartbeat-0.4.9-se/heartbeat/ha_msg.h
--- heartbeat-0.4.9/heartbeat/ha_msg.h	Sun Nov 12 02:29:22 2000
+++ heartbeat-0.4.9-se/heartbeat/ha_msg.h	Tue Mar 20 18:08:16 2001
@@ -68,6 +68,7 @@
 #define	T_IFSTATUS	"ifstat"	/* Interface status */
 #define T_APIREQ	"hbapi-req" 	/* Heartbeat API request */
 #define T_APIRESP	"hbapi-resp" 	/* Heartbeat API response */
+#define	T_ASKRESOURCES	"ask_resources"	/* Let the other node ask my resources */
 #define	T_STONITH	"stonith"	/* Stonith return code */
 #define T_SHUTDONE	"shutdone"      /* External Shutdown complete */
 
diff -ruN heartbeat-0.4.9/heartbeat/heartbeat.c heartbeat-0.4.9-se/heartbeat/heartbeat.c
--- heartbeat-0.4.9/heartbeat/heartbeat.c	Fri Mar 16 00:01:12 2001
+++ heartbeat-0.4.9-se/heartbeat/heartbeat.c	Mon Apr  2 15:53:59 2001
@@ -287,6 +287,10 @@
 #define ALL_RSC			(LOCAL_RSC|FOREIGN_RSC)
 #define ALL_RESOURCES		"all"
 
+enum standby { NOT, ME, OTHER, DONE };
+enum standby going_standby = NOT;
+int  standby_running = 0;
+
 const char *		rsc_msg[] =	{NO_RESOURCES, LOCAL_RESOURCES,
         				 FOREIGN_RESOURCES, ALL_RESOURCES};
 int		verbose = 0;
@@ -369,6 +373,7 @@
 void	check_for_timeouts(void);
 void	check_comm_isup(void);
 int	send_resources_held(const char *str, int stable);
+int	send_standby_msg(enum standby state);
 int	send_local_starting(void);
 int	send_local_status(void);
 int	set_local_status(const char * status);
@@ -398,9 +403,11 @@
 void	nak_rexmit(unsigned long seqno, const char * reason);
 void	req_our_resources(int getthemanyway);
 void	giveup_resources(int);
+void	go_standby(enum standby who);
 void	make_realtime(void);
 void	make_normaltime(void);
 int	IncrGeneration(unsigned long * generation);
+void	ask_for_resources(struct ha_msg *msg);
 
 /* The biggies */
 void control_process(FILE * f);
@@ -1254,6 +1261,14 @@
 	now = time(NULL);
 	messagetime = times(NULL);
 
+	if (standby_running) {
+		/* if there's a standby timer running, verify if it's
+		   time to enable the standby messages again... */
+		if (now >= standby_running) {
+			standby_running = 0;
+		}
+	}
+
 	/* Extract message type, originator, timestamp, auth */
 	type = ha_msg_value(msg, F_TYPE);
 	from = ha_msg_value(msg, F_ORIG);
@@ -1366,6 +1381,19 @@
 		goto psm_done;
 	}
 
+	/* If someone asked us to turn "standby" mode on... */
+	if (strcasecmp(type, T_ASKRESOURCES) == 0) {
+		/* if the last standby conversation finished... */
+		if (!standby_running) {
+			/* someone wants to go standby!!! */
+			ask_for_resources(msg);
+		} else {
+			ha_log(LOG_INFO,
+			"Standby delay is running. MSG from %s ignored", from);
+		}
+		goto psm_done;
+	}
+		
 	/* Is this a status update (i.e., "heartbeat") message? */
 	if (strcasecmp(type, T_STATUS) == 0
 	||	strcasecmp(type, T_NS_STATUS) == 0) {
@@ -2426,6 +2454,41 @@
 }
 
 
+/* Send "standby" related msgs out to the cluster */
+int
+send_standby_msg(enum standby state)
+{
+const char * standby_msg[] = { "not", "me", "other", "done"};
+
+        struct ha_msg * m;
+        int             rc;
+        char            timestamp[16];
+
+        sprintf(timestamp, TIME_X, (TIME_T) time(NULL));
+
+	if (ANYDEBUG) {
+        	ha_log(LOG_DEBUG, "Sending standby [%s] msg", standby_msg[state]);
+	}
+        if ((m=ha_msg_new(0)) == NULL) {
+                ha_log(LOG_ERR, "Cannot send standby [%s] msg", standby_msg[state]);
+                return(HA_FAIL);
+        }
+        if ((ha_msg_add(m, F_TYPE, T_ASKRESOURCES) == HA_FAIL)
+        ||  (ha_msg_add(m, F_ORIG, curnode->nodename) == HA_FAIL)
+        ||  (ha_msg_add(m, F_TIME, timestamp) == HA_FAIL)
+        ||  (ha_msg_add(m, F_COMMENT, standby_msg[state]) == HA_FAIL)) {
+                ha_log(LOG_ERR, "send_standby_msg: "
+                "Cannot create standby replay msg");
+                rc = HA_FAIL;
+        }else{
+                rc = send_cluster_msg(m);
+        }
+
+        ha_msg_del(m);
+        return(rc);
+}
+
+
 /* Send the starting msg out to the cluster */
 int
 send_local_starting(void)
@@ -2715,10 +2778,12 @@
 		||	(i_hold_resources & LOCAL_RSC) != 0)
 		&&	!getthemanyway) {
 
-			/* Someone already owns our resources */
-			ha_log(LOG_INFO
-			,	"Resource acquisition completed. (none)");
-			return;
+			if (going_standby == NOT) {
+				/* Someone already owns our resources */
+				ha_log(LOG_INFO
+				,	"Resource acquisition completed. (none)");
+				return;
+			}
 		}
 
 		/*
@@ -2796,6 +2861,85 @@
 }
 
 void
+go_standby(enum standby who)
+{
+	FILE *		rkeys;
+	char		cmd[MAXLINE];
+	char		buf[MAXLINE];
+	int		finalrc = HA_OK;
+	int		rc;
+	pid_t		pid;
+
+	/* We need to fork so we can make child procs not real time */
+
+	switch((pid=fork())) {
+
+		case -1:	ha_log(LOG_ERR, "Cannot fork.");
+				return;
+
+				/*
+				 * We shouldn't block here, because then we
+				 * aren't sending heartbeats out...
+				 */
+		default:	/* waitpid(pid, NULL, 0); */
+				return;
+
+		case 0:		/* Child */
+				break;
+	}
+
+	make_normaltime();
+	signal(SIGCHLD, SIG_DFL);
+	/*
+	 *	We could do this ourselves fairly easily...
+	 */
+
+	sprintf(cmd, HALIB "/ResourceManager listkeys '.*'");
+
+	if ((rkeys = popen(cmd, "r")) == NULL) {
+		ha_log(LOG_ERR, "Cannot run command %s", cmd);
+		return;
+	}
+
+	while (fgets(buf, MAXLINE, rkeys) != NULL) {
+		if (buf[strlen(buf)-1] == '\n') {
+			buf[strlen(buf)-1] = EOS;
+		}
+		if (who == ME) {
+			sprintf(cmd, HALIB "/ResourceManager givegroup %s", buf);
+		} else {
+			if (who == OTHER) {
+				sprintf(cmd, HALIB "/ResourceManager takegroup %s", buf);
+			}
+		}
+		if ((rc=system(cmd)) != 0) {
+			ha_log(LOG_ERR, "%s returned %d", cmd, rc);
+			finalrc=HA_FAIL;
+		}
+	}
+	pclose(rkeys);
+	ha_log(LOG_INFO, "who: %d", who);
+	if (who == ME) {
+		i_hold_resources = NO_RSC;
+		ha_log(LOG_INFO, "Giving up all HA resources(standby).");
+		ha_log(LOG_INFO, "All HA resources relinquished.");
+	} else {
+		if (who == OTHER) {
+			i_hold_resources |= FOREIGN_RSC;
+			ha_log(LOG_INFO, "Taking over foreign HA resources(primary).");
+			ha_log(LOG_INFO, "Foreign resources acquired.");
+		}
+	}
+
+	if (nice_failback) {
+		send_resources_held(rsc_msg[i_hold_resources],1);
+	}
+
+	exit(rc);
+	
+}
+
+void
 giveup_resources(int dummy)
 {
 	FILE *		rkeys;
@@ -3968,6 +4112,83 @@
 	sync();
 	return HA_OK;
 }
+
+
+void ask_for_resources(struct ha_msg *msg)
+{
+
+const char * info, * from;
+int 	msgfromme;
+								
+
+	info = ha_msg_value(msg, F_COMMENT);
+	from = ha_msg_value(msg, F_ORIG);
+	msgfromme = !strcmp(from, curnode->nodename);
+
+	/* Starting the STANDBY 3-phased protocol */
+
+	switch(going_standby) {
+	case NOT:	
+		if ((!strncasecmp(info,"me",2))) {
+			ha_log(LOG_INFO, "%s wants to go standby", from);
+			if (msgfromme) {
+				ha_log(LOG_INFO, "i_hold_resources: %d", i_hold_resources);
+				if (i_hold_resources!=NO_RSC) {
+					/* I want to go standby */
+					going_standby = ME;
+				}
+			} else {
+				ha_log(LOG_INFO, "other_holds_resources: %d", other_holds_resources);
+				if (other_holds_resources!=NO_RSC) {
+					/* the other node wants to go standby */
+					going_standby = OTHER;
+					send_standby_msg(going_standby);
+				}
+			}
+		}
+		break;
+	case ME:
+		/* other node is alive, so it's time to give up my resources */	
+		if ((!msgfromme) && (!strncasecmp(info,"other",5))) {
+			TIME_T 	now = time(NULL);
+
+			ha_log(LOG_INFO, "%s can hold my resources", from);
+			go_standby(ME);
+			going_standby = DONE;
+			ha_log(LOG_INFO, "Resources released...");
+			send_standby_msg(going_standby);
+			ha_log(LOG_INFO, "Standby process finished. I'm secondary");
+			standby_running = now + 15;
+			going_standby = NOT;
+		}
+		break;
+	case OTHER:
+		/* It's time to give up my resources */	
+		if ((!msgfromme) && (!strncasecmp(info,"done",4))) {
+			TIME_T 	now = time(NULL);
+
+			ha_log(LOG_INFO, "time to hold [%s] resources", from);
+			req_our_resources(1);
+			go_standby(OTHER);
+			going_standby = DONE;
+			ha_log(LOG_INFO, "takeover complete...");
+			send_standby_msg(going_standby);
+			ha_log(LOG_INFO, "Standby process finished. I'm primary");
+			standby_running= now + 15;
+			going_standby = NOT;
+		}
+		break;
+	case DONE:
+		/* if ((!msgfromme)&&(!strncasecmp(info,"done",4))) {
+			ha_log(LOG_INFO, "Standby process finished. I'm secondary");
+			going_standby = NOT;
+		}
+		*/
+		break;
+	}
+			
+}
+
 
 #ifdef IRIX
 void
diff -ruN heartbeat-0.4.9/heartbeat/lib/hb_standby heartbeat-0.4.9-se/heartbeat/lib/hb_standby
--- heartbeat-0.4.9/heartbeat/lib/hb_standby	Wed Dec 31 21:00:00 1969
+++ heartbeat-0.4.9-se/heartbeat/lib/hb_standby	Wed Mar 21 11:41:05 2001
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+# This script sets the node (where it was called) to *standby*
+# It forces the node to release all the held resources and lets
+# the other node do a takeover.
+
+#	This script will only work for a two machine setup...
+#	More than that and you need to vote, or something...
+
+. /etc/ha.d/shellfuncs
+
+: Now running $0: $*
+
+# mdown=$1;	i#	The name of the downed machine...
+
+    ha_log "I'm going to standby mode :)"
+  	ha_clustermsg <<-!MSG
+	t=ask_resources
+	info=me
+	!MSG
---------------------------------------cut here-----------------------
-- 
[ Luis Claudio R. Goncalves                  lclaudio@conectiva.com.br ]
[ MSc coming soon -- Conectiva HA Team -- Gospel User -- Linuxer -- :) ]
[ Fault Tolerance - Real-Time - Distributed Systems - IECLB - IS 40:31 ]
[ LateNite Programmer --  Jesus Is The Solid Rock On Which I Stand  -- ]