[Linux-ha-dev] Dynamic Modify the timeout values

Horms horms at verge.net.au
Wed Jun 20 21:36:02 MDT 2007


On Mon, Jun 18, 2007 at 01:55:12PM +0900, DAIKI MATSUDA wrote:
> Hi, All
> 
> I add the new function for heartbeat-2.0.8 and attached its patch file.
> 
> The function is to apply the new timeout parameters ( keepalive, deadtime,
> deadping, warntime ) without stopping the heartbeat services. Currently
> heartbeat boot scripts supply the 'reload' or 'forcereload' function, but
> it, they are same, does stop the services and the HA services are moved to
> standby node, because its process kills the forked heartbeat processes and
> clients ( crmd etc. ).
> So, we think to without suspending the services make the changing parameters
> to apply to driving nodes. Current feature is following.
> 1. changing ha.cf file for 4 parameters
> 2. send working parent heartbeat process signal SIGRTMAX (e.g. kill -s
> SIGRTMAX `cat /var/run/heartbeat.pid` (Why do I choose SIGRTMAX? I do not
> find the unused good signal.)
> 
> As we research the heatbeat, it may be safety. And I want to listen to your
> issues for patch and functions.
> 
> Best Regards
> MATSUDA, Daiki

Hi Matsuda-san,

I've taken a look over your patch. Its very long, so I am not sure that
I understand it all. One thing that I am particularly puzzled over is
the purpose of maxdispatchdelay_array. Is this used to notify child
processes when an update occurs?

I've put a number of comments inline, mainly minor things relating
to formating of the code. Other than that my only queries are:

* Has it been tested?
* If so, can you re-test it against the dev tree?
* If at all possible could you split the patch up into smaller bits.
  Perhaps if you adopt some of my refactoring of existing code ideas
  they could be separate patches?
* Do you propose to add a target to this to the init script?

> diff -uNr heartbeat-2.0.8/heartbeat/config.c heartbeat-2.0.8.orig/heartbeat/config.c
> --- heartbeat-2.0.8/heartbeat/config.c	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/config.c	2007-06-18 10:38:26.000000000 +0900
> @@ -116,6 +116,33 @@
>    static int set_normalpoll(const char *);
>  #endif
>  
> +void modify_setmaxdispatchdelay_value( int );
> +void modify_setall_id_value( int );
> +void modify_channel_value( int );
> +
> +/* from include/clplumbing/GSource_internal.h */
> +#define COMMON_STRUCTSTART						\
> +GSource		source;		/* Common glib struct -  must be 1st */	\
> +unsigned	magno;		/* Magic number */			\
> +long		maxdispatchms;	/* Time limit for dispatch function */	\
> +long		maxdispatchdelayms; /* Max delay before processing */	\
> +char		detecttime[sizeof(longclock_t)];			\
> +				/* Time last input detected */		\
> +void*		udata;		/* User-defined data */			\
> +guint		gsourceid;      /* Source id of this source */		\
> +const char *	description;    /* Description of this source */	\
> +GDestroyNotify	dnotify
> +
> +/* from lib/clplumbing/GSource.c */
> +struct GTimeoutAppend {
> +	COMMON_STRUCTSTART;
> +	longclock_t     nexttime;
> +	guint           interval;
> +};

Should GTimeoutAppend be moved into a header file rather than duplicated?

> +
> +extern GPtrArray *maxdispatchdelay_array;
> +extern GPtrArray *setall_array;
> +extern GArray *timeout_add_full_array;
>  
>  void hb_set_max_rexmit_delay(int);
>  /*
> @@ -212,6 +239,7 @@
>  int					netstring_format = FALSE;
>  extern int				UseApphbd;
>  GSList*					del_node_list;
> +extern int				watchdog_timeout_ms;
>  
>  
>  static int	islegaldirective(const char *directive);
> @@ -781,6 +809,384 @@
>  	return(errcount ? HA_FAIL : HA_OK);
>  }
>  
> +/* care for G_main_setmaxdispatchdelay() function */
> +void
> +modify_setmaxdispatchdelay_value( int timeout_type )
> +{
> +	int i;
> +	long value;
> +
> +	switch( timeout_type ){
> +		case 0:
> +			value = config->heartbeat_ms;
> +			break;
> +		case 1:
> +			value = config->deadtime_ms;
> +			break;
> +		case 2:
> +			value = config->warntime_ms;
> +			break;
> +		default:
> +			return;
> +	}
> +
> +	for( i = 0; i < maxdispatchdelay_array->len; i++ ){
> +		MaxDispatchDelay_Array_t *carray = g_ptr_array_index( maxdispatchdelay_array, i );
> +
> +		if( carray->type == timeout_type ){
> +			if( carray->mode == '*' ){
> +				G_main_setmaxdispatchdelay( ( GSource * )carray->addr, value * carray->value );
> +			} else if( carray->mode == '/' ){
> +				G_main_setmaxdispatchdelay( ( GSource * )carray->addr, value / carray->value );
> +			}
> +		}
> +	}
> +}

Could you reformat the code above a little bit so that it is <= 80col wide?

> +
> +/* care for Gmain_setall_id() function */
> +void
> +modify_setall_id_value( int timeout_type )
> +{
> +	int i;
> +	long value;
> +
> +	switch( timeout_type ){
> +		case 0:
> +			value = config->heartbeat_ms;
> +			break;
> +		case 1:
> +			value = config->deadtime_ms;
> +			break;
> +		case 2:
> +			value = config->warntime_ms;
> +			break;
> +		default:
> +			return;
> +	}

The logic above seems to be a duplicate of the first half of
modify_setmaxdispatchdelay_value(). Is there any value in
breaking it out into its own (static?) function?

> +
> +	for( i = 0; i < setall_array->len; i++ ){
> +		SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i );
> +
> +		if( sarray->type == timeout_type ){
> +			if( sarray->calc[0] == '\0' ){
> +				G_main_setmaxdispatchdelay_id( sarray->id, value );
> +			} else {
> +				char *ptr1 = sarray->calc;
> +				char *ptr2;
> +				unsigned long val = value;
> +
> +				for( ; ; ){
> +					char buf[2];
> +					int num;
> +
> +					ptr2 = strchr( ptr1, ' ' );
> +					if( ptr2 != NULL ) ptr2 = strchr( ++ptr2, ' ' );
> +
> +					if( ptr2 != NULL ){
> +						*ptr2 = '\0';
> +					}
> +					sscanf( ptr1, "%s %d", buf, &num );
> +					switch( buf[0] ){
> +						case '+':
> +							val += num;
> +							break;	
> +						case '-':
> +							val -= num;
> +							break;	
> +						case '*':
> +							val *= num;
> +							break;	
> +						case '/':
> +							val /= num;
> +							break;	
> +						default:
> +							break;
> +					}
> +					if( ptr2 == NULL ){
> +						break;
> +					} else {
> +						*ptr2 = ' ';
> +						ptr1 = ptr2 + 1;
> +					}
> +				}
> +				G_main_setmaxdispatchdelay_id( sarray->id, val );
> +			}
> +		}
> +	}
> +}

Again, 80col please. Perhaps if you change the logic around a bit
then making it narrower would be easier:

	for( i = 0; i < setall_array->len; i++ ){
		SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i );
		if( sarray->type != timeout_type ) {
			continue;
		}
		if( sarray->calc[0] == '\0' ){
			G_main_setmaxdispatchdelay_id( sarray->id, value );
			continue;
		}
		for( ; ; ){
			...

Also, I'm not a big fan of declaring variables other inside blocks,
but I'm not sure what the linux-ha guidelines on this are.

> +
> +void
> +modify_channel_value( int timeout_type )
> +{
> +	modify_setmaxdispatchdelay_value( timeout_type );
> +	modify_setall_id_value( timeout_type );
> +}
> +
> +int
> +light_parse_config(const char * cfgfile )

The top half of light_parse_config() seems to contain a lot of code from
parse_config() Is it possible to restructure or split parse_config() in
order to reduce the volume of code duplication?

> +{
> +	FILE	*	f;
> +	char		buf[MAXLINE];
> +	char *		cp;
> +	char		directive[MAXLINE];
> +	size_t		dirlength;
> +	char		option[MAXLINE];
> +	size_t		optionlength;
> +	int		i;
> +	struct stat	sbuf;
> +
> +#define OW_KEEPALIVE 1
> +#define OW_DEADTIME 2
> +#define OW_WARNTIME 4
> +#define OW_DEADPING 8

I'd rather if these defines were just done at the top of the file.
There doesn't seem to be any pressing need to #undef them at any stage.

> +	int mode = 0;
> +
> +	long new_keepalive_ms = 0;
> +	long new_deadtime_ms = 0;
> +	long new_warntime_ms = 0;
> +	long new_deadping_ms = -1;
> +	char *new_keepalive_string = NULL;
> +	char *new_deadtime_string = NULL;
> +	char *new_warntime_string = NULL;
> +	char *new_deadping_string = NULL;
> +
> +	if ((f = fopen(cfgfile, "r")) == NULL) {
> +		ha_log(LOG_ERR, "Cannot open config file [%s]", cfgfile);
> +		ha_log(LOG_INFO
> +		,       "An annotated sample %s file is provided in"
> +		" the documentation."
> +		,       cfgfile);
> +		ha_log(LOG_INFO
> +		,       "Please copy it to %s, read it, customize it"
> +		", and try again."
> +		,       cfgfile);
> +
> +		return(HA_FAIL);
> +	}
> +
> +	fstat(fileno(f), &sbuf);
> +	if( config->cfg_time == sbuf.st_mtime ){
> +		ha_log( LOG_ERR, "The timestamp of %s is not changed!\n", cfgfile );
> +		return 1;
> +	}
> +
> +	/* It's ugly, but effective  */
> +
> +	while (fgets(buf, MAXLINE, f) != NULL) {
> +		char *  bp = buf; 

The line above contains trailing whitespace.

> +		int	IsOptionDirective=1;
> +
> +		/* Skip over white space */
> +		bp += strspn(bp, WHITESPACE);
> +		

The line above contains trailing whitespace.

> +		/* Zap comments on the line */
> +		if ((cp = strchr(bp, COMMENTCHAR)) != NULL)  {
> +			*cp = EOS;
> +		}
> +		

The line above contains trailing whitespace.

> +		/* Strip '\n' and '\r' chars */
> +		if ((cp = strpbrk(bp, CRLF)) != NULL) {
> +			*cp = EOS;
> +		}
> +
> +		/* Ignore blank (and comment) lines */
> +		if (*bp == EOS) {
> +			continue;
> +		}
> +
> +		/* Now we expect a directive name */
> +
> +		dirlength = strcspn(bp, WHITESPACE);
> +		strncpy(directive, bp, dirlength);
> +		directive[dirlength] = EOS;
> +
> +		bp += dirlength;
> +
> +		/* Skip over Delimiters */
> +		bp += strspn(bp, DELIMS);
> +
> +		/* Now Check for  the options-list stuff */
> +		while (IsOptionDirective && *bp != EOS) {
> +			optionlength = strcspn(bp, DELIMS);
> +			strncpy(option, bp, optionlength);
> +			option[optionlength] = EOS;
> +			bp += optionlength;
> +
> +			if( ! strcmp( directive, KEY_KEEPALIVE ) ){
> +				new_keepalive_ms = get_msec( option );
> +				new_keepalive_string = strdup( option );
> +			} else if( ! strcmp( directive, KEY_DEADTIME ) ){
> +				new_deadtime_ms = get_msec( option );
> +				new_deadtime_string = strdup( option );
> +			} else if( ! strcmp( directive, KEY_WARNTIME ) ){
> +				new_warntime_ms = get_msec( option );
> +				new_warntime_string = strdup( option );
> +			} else if( ! strcmp( directive, KEY_DEADPING ) ){
> +				new_deadping_ms = get_msec( option );
> +				new_deadping_string = strdup( option );
> +			}
> +			/* Skip over Delimiters */
> +			bp += strspn(bp, DELIMS);
> +		}
> +	}
> +	fclose( f );
> +
> +	if( new_keepalive_ms == 0 ){
> +		ha_log( LOG_ERR, "keepalive value is not set.\n" );
> +		return 2;
> +	}
> +
> +
> +	if( new_deadtime_ms == 0 ){
> +		ha_log( LOG_ERR, "deadtime value is not set.\n" );
> +		return 2;
> +	}
> +
> +	if( new_deadtime_ms != config->deadtime_ms ) mode |= OW_DEADTIME;
> +
> +	if( new_keepalive_ms != config->heartbeat_ms ) mode |= OW_KEEPALIVE;
> +	if( new_warntime_ms != config->warntime_ms ) mode |= OW_WARNTIME;
> +	if( new_deadping_ms != config->deadping_ms ) mode |= OW_DEADPING;
> +
> +	if( new_deadtime_ms < new_keepalive_ms * 2 ){
> +		ha_log( LOG_ERR, "deadtime value is smaller than keepalive time * 2.\n" );
> +		ha_log( LOG_ERR, "keepalive value: %ld\n", new_keepalive_ms );
> +		ha_log( LOG_ERR, "deadtime value: %ld\n", new_deadtime_ms );
> +		return 3;
> +	}
> +
> +	if( new_deadping_ms < new_keepalive_ms * 2 ){
> +		ha_log( LOG_ERR, "deadping value is smaller than keepalive time * 2.\n" );
> +		ha_log( LOG_ERR, "keepalive value: %ld\n", new_keepalive_ms );
> +		ha_log( LOG_ERR, "deadping value: %ld\n", new_deadping_ms );
> +		return 4;
> +	}
> +
> +/* Maybe following is safe */

Could you explain on what might be unsafe about it?

> +	if( mode & ( OW_KEEPALIVE | OW_DEADTIME ) ){
> +		if( new_keepalive_ms > config->deadtime_ms ){
> +			modify_deadtime_value( new_deadtime_ms, new_deadtime_string );
> +			free( new_deadtime_string );
> +			modify_keepalive_value( new_keepalive_ms, new_keepalive_string );
> +			free( new_keepalive_string );
> +		} else {
> +			modify_keepalive_value( new_keepalive_ms, new_keepalive_string );
> +			free( new_keepalive_string );
> +			modify_deadtime_value( new_deadtime_ms, new_deadtime_string );
> +			free( new_deadtime_string );
> +		}
> +	} else {
> +		if( mode & OW_KEEPALIVE ){
> +			modify_keepalive_value( new_keepalive_ms, new_keepalive_string );
> +			free( new_keepalive_string );
> +		}
> +
> +		if( mode & OW_DEADTIME ){
> +			modify_deadtime_value( new_deadtime_ms, new_deadtime_string );
> +			free( new_deadtime_string );
> +		}
> +	}
> +
> +	if( mode & OW_WARNTIME ){
> +		SetParameterValue( KEY_WARNTIME, new_warntime_string );
> +		config->warntime_ms = new_warntime_ms;
> +		modify_channel_value( 2 );
> +	}
> +
> +	if( mode & OW_DEADPING ){
> +		SetParameterValue( KEY_DEADPING, new_deadping_string );
> +		config->deadping_ms = new_deadping_ms;
> +
> +		for( i = 0; i < config->nodecount; i++ ) {
> +			// config->nodes[i].has_resources = DoManageResources;
> +			if (config->nodes[i].nodetype == PINGNODE_I) {
> +				config->nodes[i].dead_ticks = msto_longclock( new_deadping_ms );
> +			}
> +		}
> +
> +		modify_channel_value( 3 );
> +	}
> +
> +	return 0;
> +#undef OW_KEEPALIVE
> +#undef OW_DEADTIME
> +#undef OW_WARNTIME
> +}
> +
> +void
> +modify_keepalive_value( long new_keepalive_ms, char *new_keepalive_string )
> +{
> +	int i;
> +
> +	cl_log( LOG_INFO, "%s: keepalive %ld time is over-written to new value %ld.\n", __FUNCTION__, config->heartbeat_ms, new_keepalive_ms );
> +	config->heartbeat_ms = new_keepalive_ms;
> +	SetParameterValue( KEY_KEEPALIVE, new_keepalive_string );
> +
> +	modify_channel_value( 0 );
> +#define        GTIMEOUT(GS)    ((struct GTimeoutAppend*)((void*)(GS)))
> +	for( i = 0; i < timeout_add_full_array->len; i++ ){
> +/* This part is cut from Gmain_timeout_add_full() in lib/clplumbing/GSource.c */
> +		struct GTimeoutAppend *append;
> +		guint id = g_array_index( timeout_add_full_array, guint, i );
> +		GSource *source;
> +
> +		source  = g_main_context_find_source_by_id( NULL, id );
> +
> +		if( source != NULL ){
> +			append = GTIMEOUT( source );
> +			// append->nexttime = add_longclock( time_longclock(), msto_longclock( config->heartbeat_ms ) );
> +       			append->interval = config->heartbeat_ms;

There seems to be mixed tabs and spaces in the indentation of the line above.

> +		}
> +	}
> +
> +/* For ccm */

Could you indent the comment on the line above?

> +	{
> +		struct ha_msg * msg;
> +		char keepalive[64];
> +
> +		snprintf( keepalive, sizeof( keepalive ), "%lx", config->heartbeat_ms );
> +		msg = ha_msg_new( 4 );
> +		ha_msg_add( msg, F_TYPE, "new keepalive" );
> +		ha_msg_add( msg, F_ORIG, curnode->nodename );
> +		ha_msg_add( msg, F_STATUS, "new keepalive" );
> +		ha_msg_add( msg, F_KEEPALIVE, keepalive );
> +
> +		heartbeat_monitor( msg, KEEPIT, NULL );
> +	}
> +#undef GTIMEOUT
> +}
> +
> +void
> +modify_deadtime_value( long new_deadtime_ms, char *new_deadtime_string )

I haven't investigated this, but how is the deadtime set on startup?
Would it make sense to abstract it a bit so that the same code can be
used for startup and modify? Perhaps a modified modify_deadtime_value()
could be used in both cases.  Ditto for modify_keepalive_value()

> +{
> +	int i;
> +
> +	cl_log( LOG_INFO, "%s: deadtime %ld time is over-written to new value %ld.\n", __FUNCTION__, config->deadtime_ms, new_deadtime_ms );
> +	SetParameterValue( KEY_DEADTIME, new_deadtime_string );
> +
> +/* this part is taken from init_config() */
> +		for( i = 0; i < config->nodecount; i++ ) {
> +		// config->nodes[i].has_resources = DoManageResources;

If the line above isn't needed, please just remove it.

> +		if( config->nodes[i].nodetype != PINGNODE_I && strcmp( config->nodes[i].nodename, curnode->nodename ) == 0 ){
> +			config->nodes[i].dead_ticks = msto_longclock( new_deadtime_ms );
> +		}
> +	}
> +/* To HERE */
> +
> +/* for heartbeat.c hb_init_watchdog_interval() */
> +	if( watchdog_timeout_ms == 0L || watchdog_timeout_ms == ( config->deadtime_ms + 10 ) ) watchdog_timeout_ms = new_deadtime_ms + 10;
> +/* To HERE */
> +	config->deadtime_ms = new_deadtime_ms;
> +
> +	modify_channel_value( 1 );
> +
> +/* for heartbeat.c hb_reregister_with_apphbd() and hb_init_watchdog() */
> +	if ( UseApphbd == TRUE ) {
> +		hb_init_register_with_apphbd_dummy();
> +	}
> +/* To HERE */
> +}
> +

Are the "To HERE" commends just debugging/development stuff? If so,
could you remove them. Also, could you please indent the other comments
in this function.

>  /*
>   *	Dump the configuration file - as a configuration file :-)
>   *
> diff -uNr heartbeat-2.0.8/heartbeat/hb_api.c heartbeat-2.0.8.orig/heartbeat/hb_api.c
> --- heartbeat-2.0.8/heartbeat/hb_api.c	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_api.c	2007-06-04 15:16:07.000000000 +0900
> @@ -198,7 +198,7 @@
>  	seqno_t		last_seq;
>  };
>  
> -
> +GPtrArray *maxdispatchdelay_array;
>  
>  static int
>  should_msg_sendto_client(client_proc_t* client, struct ha_msg* msg)
> @@ -1290,7 +1290,10 @@
>  	,	APIclients_input_dispatch
>  	,	client, G_remove_client);
>  	G_main_setdescription((GSource*)client->gsource, "API client");
>  	G_main_setmaxdispatchdelay((GSource*)client->gsource, config->heartbeat_ms);
> +
> +	maxdispatchdelay_array_add( ( void * )client->gsource, '*', 1, 0, 0 );
> +
>  	G_main_setmaxdispatchtime((GSource*)client->gsource, 100);
>  	if (ANYDEBUG) {
>  		cl_log(LOG_DEBUG
> diff -uNr heartbeat-2.0.8/heartbeat/hb_config.h heartbeat-2.0.8.orig/heartbeat/hb_config.h
> --- heartbeat-2.0.8/heartbeat/hb_config.h	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_config.h	2007-06-15 15:44:23.000000000 +0900
> @@ -29,7 +29,13 @@
>  int		add_node(const char * value, int nodetype);
>  int   		parse_authfile(void);
>  int		init_config(const char * cfgfile);
> +int		light_parse_config( const char * cfgfile );
> +void		modify_keepalive_value( long, char * );
> +void		modify_deadtime_value( long, char * );
>  int		StringToBaud(const char * baudstr);
>  const char *	GetParameterValue(const char * name);
> +void		hb_init_register_with_apphbd_dummy( void );
> +void		maxdispatchdelay_array_add( void *addr, char mode, int value, pid_t pid, int type );
> +void		setall_array_add( long id, int type, char * calc );
>  
>  #endif /* _HB_CONFIG_H */
> diff -uNr heartbeat-2.0.8/heartbeat/hb_rexmit.c heartbeat-2.0.8.orig/heartbeat/hb_rexmit.c
> --- heartbeat-2.0.8/heartbeat/hb_rexmit.c	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_rexmit.c	2007-05-31 11:29:24.000000000 +0900
> @@ -35,6 +35,9 @@
>  #include <clplumbing/GSource.h>
>  #include <clplumbing/cl_random.h>
>  
> +#include <hb_config.h>
> +
> +extern GPtrArray *setall_array;
>  
>  static void	schedule_rexmit_request(struct node_info* node, seqno_t seq, int delay);
>  
> @@ -235,7 +240,9 @@
>  	
>  	sourceid = Gmain_timeout_add_full(G_PRIORITY_HIGH - 1, delay, 
>  					  send_rexmit_request, ri, NULL);
>  	G_main_setall_id(sourceid, "retransmit request", config->heartbeat_ms/2, 10);
> +
> +	setall_array_add( sourceid, 0, strdup( "/ 2" ) );
>  	
>  	if (sourceid == 0){
>  		cl_log(LOG_ERR, "%s: scheduling a timeout event failed", 
> diff -uNr heartbeat-2.0.8/heartbeat/hb_signal.c heartbeat-2.0.8.orig/heartbeat/hb_signal.c
> --- heartbeat-2.0.8/heartbeat/hb_signal.c	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_signal.c	2007-05-31 10:52:29.000000000 +0900
> @@ -60,7 +60,18 @@
>  #define HB_SIG_PARENT_DEBUG_USR2_SIG           0x0020UL
>  #define HB_SIG_REREAD_CONFIG_SIG               0x0040UL
>  #define HB_SIG_FALSE_ALARM_SIG                 0x0080UL
> +#define HB_SIG_LIGHT_REREAD_CONFIG_SIG         0x0100UL
>  
> +/* Maybe no need
> +typedef struct MaxDispatchDelay_Array {
> +        GCHSource *addr;
> +        char mode;
> +        int value;
> +        int pid;
> +	int type;
> +} MaxDispatchDelay_Array_t;

Please use tabs not spaces for indentation :)

> +extern GPtrArray *maxdispatchdelay_array;
> +*/
>  
>  /*
>   * This function does NOT have the same semantics as setting SIG_IGN.
> @@ -298,6 +310,9 @@
>  void
>  hb_signal_reread_config_action(void)
>  {
> +/* Maybe no need
> +	int	i;
> +*/
>  	int	j;
>  	int	signal_children = 0;
>  
> @@ -347,6 +363,19 @@
>  		return_to_orig_privs();
>  		for (j=0; j < procinfo->nprocs; ++j) {
>  			if (procinfo->info+j != curproc) {
> +/* Maybe no need
> +				for( i = 0; ; i++ ){
> +					MaxDispatchDelay_Array_t *carray = g_ptr_array_index( maxdispatchdelay_array, i );
> +					if( carray == NULL ) break;
> +					if( carray->pid == procinfo->info[j].pid ){
> +						ha_log( LOG_INFO, "Remove infomation from maxdispatchdelay_array mem = %x, pid = %d\n", ( int )carray, carray->pid );
> +						g_ptr_array_remove( maxdispatchdelay_array, carray );
> +						free( carray );
> +						i--;
> +					}
> +				}
> +*/

It looks like it is needed to me. But if not, can you just remove it?

> +				ha_log( LOG_INFO, "SP: CL_KILL procinfo->info[%d].pid = %d, getpid = %d by SIGHUP", j, ( int )procinfo->info[j].pid, ( int )getpid() );
>  				CL_KILL(procinfo->info[j].pid, SIGHUP);
>  			}
>  		}
> @@ -354,6 +383,18 @@
>  	}
>  }
>  
> +void
> +hb_signal_light_reread_config_handler( int sig )
> +{
> +	__hb_signal_pending |= HB_SIG_LIGHT_REREAD_CONFIG_SIG;
> +}
> +
> +void
> +hb_signal_light_reread_config_action( void )
> +{
> +	light_parse_config( CONFIG_NAME );
> +	return;
> +}
>  
>  void
>  hb_signal_false_alarm_handler(int sig)
> @@ -444,6 +485,10 @@
>  			hb_signal_reread_config_action();
>  		}
>  
> +		if (handlers&HB_SIG_LIGHT_REREAD_CONFIG_SIG) {
> +			hb_signal_light_reread_config_action();
> +		}
> +
>  		if (handlers&HB_SIG_FALSE_ALARM_SIG) {
>  			hb_signal_false_alarm_action();
>  		}
> @@ -478,6 +523,7 @@
>  	,	{SIGALRM,	hb_signal_false_alarm_handler,	1}
>  	,	{SIGUSR1,	hb_signal_debug_usr1_handler,	1}
>  	,	{SIGUSR2,	hb_signal_debug_usr2_handler,	1}
> +	,	{SIGRTMAX,	hb_signal_light_reread_config_handler,	1}

Is using SIGRTMAX in this way safe? Is SIGHUP already used for something else?

>  	,	{0,		0,				0}
>  	};
>  
> @@ -634,6 +680,7 @@
>  	{	{SIGTERM,	hb_signal_term_handler,	1}
>  	,	{SIGUSR1,	parent_hb_signal_debug_usr1_handler,	1}
>  	,	{SIGUSR2,	parent_hb_signal_debug_usr2_handler,	1}
> +	,	{SIGRTMAX,	hb_signal_light_reread_config_handler,	1}
>  	,	{SIGALRM,       hb_signal_false_alarm_handler,        	1}
>  	,	{0,		0,					0}
>  	};
> diff -uNr heartbeat-2.0.8/heartbeat/hb_signal.h heartbeat-2.0.8.orig/heartbeat/hb_signal.h
> --- heartbeat-2.0.8/heartbeat/hb_signal.h	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_signal.h	2007-05-31 10:52:29.000000000 +0900
> @@ -57,6 +57,10 @@
>  
>  void hb_signal_reread_config_action(void);
>  
> +void hb_signal_light_reread_config_handler(int sig);
> +
> +void hb_signal_light_reread_config_action(void);
> +
>  void hb_signal_false_alarm_handler(int sig);
>  
>  void hb_signal_false_alarm_action(void);
> diff -uNr heartbeat-2.0.8/heartbeat/heartbeat.c heartbeat-2.0.8.orig/heartbeat/heartbeat.c
> --- heartbeat-2.0.8/heartbeat/heartbeat.c	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/heartbeat.c	2007-06-18 10:46:14.000000000 +0900
> @@ -316,7 +317,8 @@
>  
>  char *				watchdogdev = NULL;
>  static int			watchdogfd = -1;
> -static int			watchdog_timeout_ms = 0L;
> +// static int			watchdog_timeout_ms = 0L;
> +int			watchdog_timeout_ms = 0L;
>  
>  int				shutdown_in_progress = FALSE;
>  int				startup_complete = FALSE;
> @@ -342,6 +344,9 @@
>  GTRIGSource*			write_delcachefile = NULL;
>  extern GSList*			del_node_list;
>  
> +GPtrArray *maxdispatchdelay_array;
> +GPtrArray *setall_array;
> +GArray * timeout_add_full_array;
>  
>  #undef DO_AUDITXMITHIST
>  #ifdef DO_AUDITXMITHIST
> @@ -490,6 +495,33 @@
>  	NULL,
>  };
>  
> +void
> +maxdispatchdelay_array_add( void *addr, char mode, int value, pid_t pid, int type )
> +{
> +	MaxDispatchDelay_Array_t *carray;
> +
> +        carray = malloc( sizeof( MaxDispatchDelay_Array_t ) );

Should this (and others) be cl_malloc()?

> +        carray->addr = addr;
> +        carray->mode = mode;
> +        carray->value = value;
> +
> +        carray->pid = pid;
> +        carray->type = type;
> +        g_ptr_array_add( maxdispatchdelay_array, ( gpointer )carray );
> +}
> +
> +void
> +setall_array_add( long id, int type, char * calc )
> +{
> +	SetAll_Array_t *sarray;
> +
> +	sarray = malloc( sizeof( SetAll_Array_t ) );
> +	sarray->id = id;
> +	sarray->type = type;
> +	sarray->calc = calc;
> +	g_ptr_array_add( setall_array, ( gpointer )sarray );
> +}
> +
>  static void
>  init_procinfo()
>  {
> @@ -747,10 +779,13 @@
>  	FifoChildSource = G_main_add_IPC_Channel(PRI_FIFOMSG
>  	,	fifochildipc[P_READFD]
>  	,	FALSE, FIFO_child_msg_dispatch, NULL, NULL);
> +	cl_log( LOG_INFO, "%s use config->heartbeat_ms: %ld\n", __FUNCTION__, config->heartbeat_ms );
>  	G_main_setmaxdispatchdelay((GSource*)FifoChildSource, config->heartbeat_ms);
>  	G_main_setmaxdispatchtime((GSource*)FifoChildSource, 50);
>  	G_main_setdescription((GSource*)FifoChildSource, "FIFO");
> -	
> +
> +	maxdispatchdelay_array_add( ( void * )FifoChildSource, '*', 1, pid, 0 );
> +
>  	return HA_OK;
>  }
>  
> @@ -1462,6 +1503,7 @@
>  		G_main_setmaxdispatchtime((GSource*)s, 50);
>  		G_main_setdescription((GSource*)s, "write child");
>  
> +		maxdispatchdelay_array_add( ( void * )s, '/', 4, sysmedia[j]->wchan[P_WRITEFD]->farside_pid, 0 );
>  		
>  		/* Connect up the read child IPC channel... */
>  		s = G_main_add_IPC_Channel(PRI_READPKT
> @@ -1474,8 +1518,8 @@
>  		G_main_setmaxdispatchtime((GSource*)s, 50);
>  		G_main_setdescription((GSource*)s, "read child");
>  
> -}	
> -	
> +		maxdispatchdelay_array_add( ( void * )s, '/', 4, sysmedia[j]->rchan[P_WRITEFD]->farside_pid, 0 );
> +	}	
>  
>  	/*
>  	 * Things to do on a periodic basis...
> @@ -1486,6 +1533,9 @@
>  	,	hb_send_local_status, NULL, NULL);
>  	G_main_setall_id(id, "send local status", 10+config->heartbeat_ms/2, 50);
>  
> +	g_array_append_val( timeout_add_full_array, id );
> +	setall_array_add( id, 0, strdup( "/ 2 + 10" ) );
> +
>  	id=Gmain_timeout_add_full(PRI_AUDITCLIENT
>  	,	config->initial_deadtime_ms
>  	,	set_init_deadtime_passed_flag
> @@ -1493,6 +1544,8 @@
>  	,	NULL);
>  	G_main_setall_id(id, "init deadtime passed", config->warntime_ms, 50);
>  
> +	setall_array_add( id, 2, strdup( "" ) );
> +
>  	/* Dump out memory stats periodically... */
>  	memstatsinterval = (debug_level ? 10*60*1000 : ONEDAY*1000);
>  	id=Gmain_timeout_add_full(PRI_DUMPSTATS, memstatsinterval
> @@ -1515,11 +1568,17 @@
>  	id=Gmain_timeout_add_full(PRI_CHECKSIGS, config->heartbeat_ms
>  	,       Gmain_hb_signal_process_pending, NULL, NULL);
>  	G_main_setall_id(id, "check for signals", 10+config->heartbeat_ms/2, 50);
> 	
> +	cl_log( LOG_INFO, "heartbeat.c: G_main_timeout_add_full %d\n", id );
> +	g_array_append_val( timeout_add_full_array, id );
> +	setall_array_add( id, 0, strdup( "/ 2 + 10" ) );
> +
>  	id=Gmain_timeout_add_full(PRI_FREEMSG, 500
>  	,	Gmain_update_msgfree_count, NULL, NULL);
>  	G_main_setall_id(id, "update msgfree count", config->deadtime_ms, 50);
> 	
> +	setall_array_add( id, 1, strdup( "" ) );
> +
>  	if (UseApphbd) {
>  		Gmain_timeout_add_full(PRI_DUMPSTATS
>  		,	60*(1000-10) /* Not quite on a minute boundary */
> @@ -1817,7 +1875,8 @@
>  	G_main_setmaxdispatchdelay((GSource*)regsource, config->deadtime_ms);
>  	G_main_setmaxdispatchtime((GSource*)regsource, 20);
>  	G_main_setdescription((GSource*)regsource, "client registration");
> 	
> +	maxdispatchdelay_array_add( ( void * )regsource, '*', 1, 0, 1 );
>  
>  	if (regsource == NULL) {
>  		cl_log(LOG_DEBUG
> @@ -1908,6 +1967,8 @@
>  void
>  hb_initiate_shutdown(int quickshutdown)
>  {
> +	int i;
> +
>  	if (ANYDEBUG) {
>  		cl_log(LOG_DEBUG, "hb_initiate_shutdown() called.");
>  	}
> @@ -1930,6 +1991,24 @@
>  		,	"Shutdown delayed until Communication is up.");
>  		return;
>  	}
> +
> +/*
> +	for( ; ; ){
> +		MaxDispatchDelay_Array_t *carray = g_ptr_array_index( maxdispatchdelay_array, 0 );
> +		if( carray == NULL ) break;
> +		g_ptr_array_remove( maxdispatchdelay_array, carray );
> +		free( carray );
> +	}
> +*/

If the code above isn't needed, please remove it.

> +	g_ptr_array_free( maxdispatchdelay_array, TRUE );
> +
> +	for( i = 0; i < setall_array->len; i++ ){
> +		SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i );
> +		if( sarray != NULL ) free( sarray->calc );
> +	}
> +	g_ptr_array_free( setall_array, TRUE );
> +	g_array_free( timeout_add_full_array, TRUE );
> +
>  	send_local_status();
>  	if (!quickshutdown && DoManageResources) {
>  		/* THIS IS RESOURCE WORK!  FIXME */
> @@ -4079,7 +4166,10 @@
>  	send_cluster_msg(msg);
>  	
>  	id = Gmain_timeout_add(1000, send_reqnodes_msg, (gpointer)i);
>  	G_main_setall_id(id, "send_reqnodes_msg", config->heartbeat_ms, 100);
> +
> +	setall_array_add( id, 0, strdup( "" ) );
> +
>  	return FALSE;
>  }
>  
> @@ -4694,6 +4787,9 @@
>  	/*init table for nodename/uuid lookup*/
>  	inittable();
>  
> +	maxdispatchdelay_array = g_ptr_array_new();
> +	setall_array = g_ptr_array_new();
> +	timeout_add_full_array = g_array_new( FALSE, FALSE, sizeof( guint ) );
>  
>  	/*
>  	 *	We think we just performed an "exec" of ourselves to restart.
> @@ -5024,6 +5121,11 @@
>  	}
>  }
>  
> +void hb_init_register_with_apphbd_dummy( void )
> +{
> +	hb_init_register_with_apphbd();
> +}
> +
>  static gboolean
>  hb_reregister_with_apphbd(gpointer dummy)
>  {
> diff -uNr heartbeat-2.0.8/include/heartbeat.h heartbeat-2.0.8.orig/include/heartbeat.h
> --- heartbeat-2.0.8/include/heartbeat.h	2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/include/heartbeat.h	2007-05-31 10:50:25.000000000 +0900
> @@ -370,6 +370,20 @@
>  	char*		path;		/* Path (argv[0])? */
>  };
>  
> +typedef struct MaxDispatchDelay_Array {
> +	void *addr;
> +	char mode;
> +	int value;
> +	int pid;
> +	int type;
> +} MaxDispatchDelay_Array_t;
> +
> +typedef struct SetAll_Array {
> +	unsigned long id;
> +	int type;
> +	char * calc;
> +} SetAll_Array_t;
> +
>  int api_remove_client_pid(pid_t c_pid, const char * reason);
>  
>  
> diff -uNr heartbeat-2.0.8/lib/clplumbing/GSource.c heartbeat-2.0.8.orig/lib/clplumbing/GSource.c
> --- heartbeat-2.0.8/lib/clplumbing/GSource.c	2007-01-12 11:57:07.000000000 +0900
> +++ heartbeat-2.0.8.orig/lib/clplumbing/GSource.c	2007-05-31 10:46:30.000000000 +0900
> @@ -1599,7 +1599,8 @@
>  {
>  	G_main_setdescription_id(id, description);
>  	G_main_setmaxdispatchdelay_id(id, delay);
> -	G_main_setmaxdispatchtime_id(id, delay);
> +	// G_main_setmaxdispatchtime_id(id, delay);
> +	G_main_setmaxdispatchtime_id(id, elapsed);
>  }

The GSource.c fragment above is already in the dev tree :)

>  static void		TempProcessRegistered(ProcTrack* p);
> diff -uNr heartbeat-2.0.8/membership/ccm/ccm.c heartbeat-2.0.8.orig/membership/ccm/ccm.c
> --- heartbeat-2.0.8/membership/ccm/ccm.c	2007-01-12 11:57:08.000000000 +0900
> +++ heartbeat-2.0.8.orig/membership/ccm/ccm.c	2007-06-13 11:07:52.000000000 +0900
> @@ -113,6 +113,9 @@
>  	return HA_OK;
>  }
>  
> +extern ccm_info_t* ccm_info_saved;
> +extern ll_cluster_t* hb_fd_saved;
> +
>  int
>  ccm_control_process(ccm_info_t *info, ll_cluster_t * hb)
>  {
> @@ -149,6 +152,13 @@
>  			}
>  			ha_msg_del(msg);
>  			msg = newmsg;
> +		} else if(strcasecmp(type, "new keepalive") == 0){
> +			hb_fd_saved->llc_ops->signoff( hb_fd_saved, FALSE );
> +			hb_fd_saved->llc_ops->signon( hb_fd_saved, "ccm" );
> +			sleep( 3 );

What is the purpose of sleep(3). It seems strange that it is hard-coded.

> +
> +			ccm_configure_timeout( hb_fd_saved, ccm_info_saved );
> +			return TRUE;
>  		} else if(strcasecmp(type, T_STATUS) == 0){
>  			const char* nodetype;
>  			const char* site;
> diff -uNr heartbeat-2.0.8/membership/ccm/ccm.h heartbeat-2.0.8.orig/membership/ccm/ccm.h
> --- heartbeat-2.0.8/membership/ccm/ccm.h	2007-01-12 11:57:08.000000000 +0900
> +++ heartbeat-2.0.8.orig/membership/ccm/ccm.h	2007-06-08 14:20:24.000000000 +0900
> @@ -469,6 +469,8 @@
>  	int		has_quorum;	/* -1, not set, 0, no quorum, 1, has quorum */
>  } ccm_info_t;
>  
> +void ccm_configure_timeout(ll_cluster_t *, ccm_info_t *);
> +
>  /*
>   * datastructure passed to the event loop.
>   * This acts a handle, and should not be interpreted
> diff -uNr heartbeat-2.0.8/membership/ccm/ccm_statemachine.c heartbeat-2.0.8.orig/membership/ccm/ccm_statemachine.c
> --- heartbeat-2.0.8/membership/ccm/ccm_statemachine.c	2007-01-12 11:57:08.000000000 +0900
> +++ heartbeat-2.0.8.orig/membership/ccm/ccm_statemachine.c	2007-06-18 11:36:08.000000000 +0900
> @@ -201,7 +201,7 @@
>  /* */
>  /* timeout configuration function */
>  /* */
> -static void
> +void
>  ccm_configure_timeout(ll_cluster_t *hb, ccm_info_t *info)
>  {
>  	long keepalive = hb->llc_ops->get_keepalive(hb);

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev at lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/



More information about the Linux-HA-Dev mailing list