Resource based failover - give it a try ;)
Luis Claudio R.Goncalves
lclaudio@conectiva.com.br
Mon, 23 Apr 2001 11:49:24 -0300
Hello!
I sent this patch to Alan some weeks ago and he is doing some heavy
testing. Anyway, it has passed my tests and I'd like having someone else
testing it.
I created a script called /usr/lib/heartbeat/hb_stanby that once called
in node A, forces node A to become secondary (standby) and node B to
takeover resources held by A. Node A won't go standby if:
- Node A is already secondary;
- The other node is down or
- There's a standby conversation already running.
This patch can be applied against a plain 0.4.9 heartbeat and it don't
touch other heartbeat business. If you're brave enough or if you need this
feature, please, give it a try.
There are lots of ugly debug messages... but as soon as it gets
approved, I'll clean up the code.
Luis
-------------------------cut here----------------------
diff -ruN heartbeat-0.4.9/Makefile heartbeat-0.4.9-se/Makefile
--- heartbeat-0.4.9/Makefile Mon Apr 2 15:54:31 2001
+++ heartbeat-0.4.9-se/Makefile Wed Mar 21 17:20:52 2001
@@ -10,7 +10,7 @@
#
PKG=heartbeat
VERS=0.4.9
-RPMREL=1
+RPMREL=1se
INITD=$(shell [ -d /etc/init.d ] && echo /etc/init.d || echo /etc/rc.d/init.d )
LOGROTATED=/etc/logrotate.d
diff -ruN heartbeat-0.4.9/heartbeat/Makefile heartbeat-0.4.9-se/heartbeat/Makefile
--- heartbeat-0.4.9/heartbeat/Makefile Mon Apr 2 15:54:31 2001
+++ heartbeat-0.4.9-se/heartbeat/Makefile Tue Mar 20 18:08:16 2001
@@ -134,7 +134,7 @@
PRODUCTS = $(LIBCMDS) $(LIBS)
-LIBSCRIPTS = lib/mach_down lib/req_resource lib/ResourceManager
+LIBSCRIPTS = lib/mach_down lib/req_resource lib/ResourceManager lib/hb_standby
RESOURCECMDS= resource.d/IPaddr resource.d/AudibleAlarm resource.d/Raid1 resource.d/Filesystem
diff -ruN heartbeat-0.4.9/heartbeat/ha_msg.h heartbeat-0.4.9-se/heartbeat/ha_msg.h
--- heartbeat-0.4.9/heartbeat/ha_msg.h Sun Nov 12 02:29:22 2000
+++ heartbeat-0.4.9-se/heartbeat/ha_msg.h Tue Mar 20 18:08:16 2001
@@ -68,6 +68,7 @@
#define T_IFSTATUS "ifstat" /* Interface status */
#define T_APIREQ "hbapi-req" /* Heartbeat API request */
#define T_APIRESP "hbapi-resp" /* Heartbeat API response */
+#define T_ASKRESOURCES "ask_resources" /* Let the other node ask my resources */
#define T_STONITH "stonith" /* Stonith return code */
#define T_SHUTDONE "shutdone" /* External Shutdown complete */
diff -ruN heartbeat-0.4.9/heartbeat/heartbeat.c heartbeat-0.4.9-se/heartbeat/heartbeat.c
--- heartbeat-0.4.9/heartbeat/heartbeat.c Fri Mar 16 00:01:12 2001
+++ heartbeat-0.4.9-se/heartbeat/heartbeat.c Mon Apr 2 15:53:59 2001
@@ -287,6 +287,10 @@
#define ALL_RSC (LOCAL_RSC|FOREIGN_RSC)
#define ALL_RESOURCES "all"
+enum standby { NOT, ME, OTHER, DONE };
+enum standby going_standby = NOT;
+int standby_running = 0;
+
const char * rsc_msg[] = {NO_RESOURCES, LOCAL_RESOURCES,
FOREIGN_RESOURCES, ALL_RESOURCES};
int verbose = 0;
@@ -369,6 +373,7 @@
void check_for_timeouts(void);
void check_comm_isup(void);
int send_resources_held(const char *str, int stable);
+int send_standby_msg(enum standby state);
int send_local_starting(void);
int send_local_status(void);
int set_local_status(const char * status);
@@ -398,9 +403,11 @@
void nak_rexmit(unsigned long seqno, const char * reason);
void req_our_resources(int getthemanyway);
void giveup_resources(int);
+void go_standby(enum standby who);
void make_realtime(void);
void make_normaltime(void);
int IncrGeneration(unsigned long * generation);
+void ask_for_resources(struct ha_msg *msg);
/* The biggies */
void control_process(FILE * f);
@@ -1254,6 +1261,14 @@
now = time(NULL);
messagetime = times(NULL);
+ if (standby_running) {
+ /* if there's a standby timer running, verify if it's
+ time to enable the standby messages again... */
+ if (now >= standby_running) {
+ standby_running = 0;
+ }
+ }
+
/* Extract message type, originator, timestamp, auth */
type = ha_msg_value(msg, F_TYPE);
from = ha_msg_value(msg, F_ORIG);
@@ -1366,6 +1381,19 @@
goto psm_done;
}
+ /* If someone asked us to turn "standby" mode on... */
+ if (strcasecmp(type, T_ASKRESOURCES) == 0) {
+ /* if the last standby conversation finished... */
+ if (!standby_running) {
+ /* someone wants to go standby!!! */
+ ask_for_resources(msg);
+ } else {
+ ha_log(LOG_INFO,
+ "Standby delay is running. MSG from %s ignored", from);
+ }
+ goto psm_done;
+ }
+
/* Is this a status update (i.e., "heartbeat") message? */
if (strcasecmp(type, T_STATUS) == 0
|| strcasecmp(type, T_NS_STATUS) == 0) {
@@ -2426,6 +2454,41 @@
}
+/* Send "standby" related msgs out to the cluster */
+int
+send_standby_msg(enum standby state)
+{
+const char * standby_msg[] = { "not", "me", "other", "done"};
+
+ struct ha_msg * m;
+ int rc;
+ char timestamp[16];
+
+ sprintf(timestamp, TIME_X, (TIME_T) time(NULL));
+
+ if (ANYDEBUG) {
+ ha_log(LOG_DEBUG, "Sending standby [%s] msg", standby_msg[state]);
+ }
+ if ((m=ha_msg_new(0)) == NULL) {
+ ha_log(LOG_ERR, "Cannot send standby [%s] msg", standby_msg[state]);
+ return(HA_FAIL);
+ }
+ if ((ha_msg_add(m, F_TYPE, T_ASKRESOURCES) == HA_FAIL)
+ || (ha_msg_add(m, F_ORIG, curnode->nodename) == HA_FAIL)
+ || (ha_msg_add(m, F_TIME, timestamp) == HA_FAIL)
+ || (ha_msg_add(m, F_COMMENT, standby_msg[state]) == HA_FAIL)) {
+ ha_log(LOG_ERR, "send_standby_msg: "
+ "Cannot create standby replay msg");
+ rc = HA_FAIL;
+ }else{
+ rc = send_cluster_msg(m);
+ }
+
+ ha_msg_del(m);
+ return(rc);
+}
+
+
/* Send the starting msg out to the cluster */
int
send_local_starting(void)
@@ -2715,10 +2778,12 @@
|| (i_hold_resources & LOCAL_RSC) != 0)
&& !getthemanyway) {
- /* Someone already owns our resources */
- ha_log(LOG_INFO
- , "Resource acquisition completed. (none)");
- return;
+ if (going_standby == NOT) {
+ /* Someone already owns our resources */
+ ha_log(LOG_INFO
+ , "Resource acquisition completed. (none)");
+ return;
+ }
}
/*
@@ -2796,6 +2861,85 @@
}
void
+go_standby(enum standby who)
+{
+ FILE * rkeys;
+ char cmd[MAXLINE];
+ char buf[MAXLINE];
+ int finalrc = HA_OK;
+ int rc;
+ pid_t pid;
+
+ /* We need to fork so we can make child procs not real time */
+
+ switch((pid=fork())) {
+
+ case -1: ha_log(LOG_ERR, "Cannot fork.");
+ return;
+
+ /*
+ * We shouldn't block here, because then we
+ * aren't sending heartbeats out...
+ */
+ default: /* waitpid(pid, NULL, 0); */
+ return;
+
+ case 0: /* Child */
+ break;
+ }
+
+ make_normaltime();
+ signal(SIGCHLD, SIG_DFL);
+ /*
+ * We could do this ourselves fairly easily...
+ */
+
+ sprintf(cmd, HALIB "/ResourceManager listkeys '.*'");
+
+ if ((rkeys = popen(cmd, "r")) == NULL) {
+ ha_log(LOG_ERR, "Cannot run command %s", cmd);
+ return;
+ }
+
+ while (fgets(buf, MAXLINE, rkeys) != NULL) {
+ if (buf[strlen(buf)-1] == '\n') {
+ buf[strlen(buf)-1] = EOS;
+ }
+ if (who == ME) {
+ sprintf(cmd, HALIB "/ResourceManager givegroup %s", buf);
+ } else {
+ if (who == OTHER) {
+ sprintf(cmd, HALIB "/ResourceManager takegroup %s", buf);
+ }
+ }
+ if ((rc=system(cmd)) != 0) {
+ ha_log(LOG_ERR, "%s returned %d", cmd, rc);
+ finalrc=HA_FAIL;
+ }
+ }
+ pclose(rkeys);
+ ha_log(LOG_INFO, "who: %d", who);
+ if (who == ME) {
+ i_hold_resources = NO_RSC;
+ ha_log(LOG_INFO, "Giving up all HA resources(standby).");
+ ha_log(LOG_INFO, "All HA resources relinquished.");
+ } else {
+ if (who == OTHER) {
+ i_hold_resources |= FOREIGN_RSC;
+ ha_log(LOG_INFO, "Taking over foreign HA resources(primary).");
+ ha_log(LOG_INFO, "Foreign resources acquired.");
+ }
+ }
+
+ if (nice_failback) {
+ send_resources_held(rsc_msg[i_hold_resources],1);
+ }
+
+ exit(rc);
+
+}
+
+void
giveup_resources(int dummy)
{
FILE * rkeys;
@@ -3968,6 +4112,83 @@
sync();
return HA_OK;
}
+
+
+void ask_for_resources(struct ha_msg *msg)
+{
+
+const char * info, * from;
+int msgfromme;
+
+
+ info = ha_msg_value(msg, F_COMMENT);
+ from = ha_msg_value(msg, F_ORIG);
+ msgfromme = !strcmp(from, curnode->nodename);
+
+ /* Starting the STANDBY 3-phased protocol */
+
+ switch(going_standby) {
+ case NOT:
+ if ((!strncasecmp(info,"me",2))) {
+ ha_log(LOG_INFO, "%s wants to go standby", from);
+ if (msgfromme) {
+ ha_log(LOG_INFO, "i_hold_resources: %d", i_hold_resources);
+ if (i_hold_resources!=NO_RSC) {
+ /* I want to go standby */
+ going_standby = ME;
+ }
+ } else {
+ ha_log(LOG_INFO, "other_holds_resources: %d", other_holds_resources);
+ if (other_holds_resources!=NO_RSC) {
+ /* the other node wants to go standby */
+ going_standby = OTHER;
+ send_standby_msg(going_standby);
+ }
+ }
+ }
+ break;
+ case ME:
+ /* other node is alive, so it's time to give up my resources */
+ if ((!msgfromme) && (!strncasecmp(info,"other",5))) {
+ TIME_T now = time(NULL);
+
+ ha_log(LOG_INFO, "%s can hold my resources", from);
+ go_standby(ME);
+ going_standby = DONE;
+ ha_log(LOG_INFO, "Resources released...");
+ send_standby_msg(going_standby);
+ ha_log(LOG_INFO, "Standby process finished. I'm secondary");
+ standby_running = now + 15;
+ going_standby = NOT;
+ }
+ break;
+ case OTHER:
+ /* It's time to give up my resources */
+ if ((!msgfromme) && (!strncasecmp(info,"done",4))) {
+ TIME_T now = time(NULL);
+
+ ha_log(LOG_INFO, "time to hold [%s] resources", from);
+ req_our_resources(1);
+ go_standby(OTHER);
+ going_standby = DONE;
+ ha_log(LOG_INFO, "takeover complete...");
+ send_standby_msg(going_standby);
+ ha_log(LOG_INFO, "Standby process finished. I'm primary");
+ standby_running= now + 15;
+ going_standby = NOT;
+ }
+ break;
+ case DONE:
+ /* if ((!msgfromme)&&(!strncasecmp(info,"done",4))) {
+ ha_log(LOG_INFO, "Standby process finished. I'm secondary");
+ going_standby = NOT;
+ }
+ */
+ break;
+ }
+
+}
+
#ifdef IRIX
void
diff -ruN heartbeat-0.4.9/heartbeat/lib/hb_standby heartbeat-0.4.9-se/heartbeat/lib/hb_standby
--- heartbeat-0.4.9/heartbeat/lib/hb_standby Wed Dec 31 21:00:00 1969
+++ heartbeat-0.4.9-se/heartbeat/lib/hb_standby Wed Mar 21 11:41:05 2001
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+# This script sets the node (where it was called) to *standby*
+# It forces the node to release all the held resources and lets
+# the other node do a takeover.
+
+# This script will only work for a two machine setup...
+# More than that and you need to vote, or something...
+
+. /etc/ha.d/shellfuncs
+
+: Now running $0: $*
+
+# mdown=$1; i# The name of the downed machine...
+
+ ha_log "I'm going to standby mode :)"
+ ha_clustermsg <<-!MSG
+ t=ask_resources
+ info=me
+ !MSG
---------------------------------------cut here-----------------------
--
[ Luis Claudio R. Goncalves lclaudio@conectiva.com.br ]
[ MSc coming soon -- Conectiva HA Team -- Gospel User -- Linuxer -- :) ]
[ Fault Tolerance - Real-Time - Distributed Systems - IECLB - IS 40:31 ]
[ LateNite Programmer -- Jesus Is The Solid Rock On Which I Stand -- ]