[Linux-ha-dev] Dynamic Modify the timeout values
Horms
horms at verge.net.au
Wed Jun 20 21:36:02 MDT 2007
On Mon, Jun 18, 2007 at 01:55:12PM +0900, DAIKI MATSUDA wrote:
> Hi, All
>
> I add the new function for heartbeat-2.0.8 and attached its patch file.
>
> The function is to apply the new timeout parameters ( keepalive, deadtime,
> deadping, warntime ) without stopping the heartbeat services. Currently
> heartbeat boot scripts supply the 'reload' or 'forcereload' function, but
> it, they are same, does stop the services and the HA services are moved to
> standby node, because its process kills the forked heartbeat processes and
> clients ( crmd etc. ).
> So, we think to without suspending the services make the changing parameters
> to apply to driving nodes. Current feature is following.
> 1. changing ha.cf file for 4 parameters
> 2. send working parent heartbeat process signal SIGRTMAX (e.g. kill -s
> SIGRTMAX `cat /var/run/heartbeat.pid` (Why do I choose SIGRTMAX? I do not
> find the unused good signal.)
>
> As we research the heatbeat, it may be safety. And I want to listen to your
> issues for patch and functions.
>
> Best Regards
> MATSUDA, Daiki
Hi Matsuda-san,
I've taken a look over your patch. Its very long, so I am not sure that
I understand it all. One thing that I am particularly puzzled over is
the purpose of maxdispatchdelay_array. Is this used to notify child
processes when an update occurs?
I've put a number of comments inline, mainly minor things relating
to formating of the code. Other than that my only queries are:
* Has it been tested?
* If so, can you re-test it against the dev tree?
* If at all possible could you split the patch up into smaller bits.
Perhaps if you adopt some of my refactoring of existing code ideas
they could be separate patches?
* Do you propose to add a target to this to the init script?
> diff -uNr heartbeat-2.0.8/heartbeat/config.c heartbeat-2.0.8.orig/heartbeat/config.c
> --- heartbeat-2.0.8/heartbeat/config.c 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/config.c 2007-06-18 10:38:26.000000000 +0900
> @@ -116,6 +116,33 @@
> static int set_normalpoll(const char *);
> #endif
>
> +void modify_setmaxdispatchdelay_value( int );
> +void modify_setall_id_value( int );
> +void modify_channel_value( int );
> +
> +/* from include/clplumbing/GSource_internal.h */
> +#define COMMON_STRUCTSTART \
> +GSource source; /* Common glib struct - must be 1st */ \
> +unsigned magno; /* Magic number */ \
> +long maxdispatchms; /* Time limit for dispatch function */ \
> +long maxdispatchdelayms; /* Max delay before processing */ \
> +char detecttime[sizeof(longclock_t)]; \
> + /* Time last input detected */ \
> +void* udata; /* User-defined data */ \
> +guint gsourceid; /* Source id of this source */ \
> +const char * description; /* Description of this source */ \
> +GDestroyNotify dnotify
> +
> +/* from lib/clplumbing/GSource.c */
> +struct GTimeoutAppend {
> + COMMON_STRUCTSTART;
> + longclock_t nexttime;
> + guint interval;
> +};
Should GTimeoutAppend be moved into a header file rather than duplicated?
> +
> +extern GPtrArray *maxdispatchdelay_array;
> +extern GPtrArray *setall_array;
> +extern GArray *timeout_add_full_array;
>
> void hb_set_max_rexmit_delay(int);
> /*
> @@ -212,6 +239,7 @@
> int netstring_format = FALSE;
> extern int UseApphbd;
> GSList* del_node_list;
> +extern int watchdog_timeout_ms;
>
>
> static int islegaldirective(const char *directive);
> @@ -781,6 +809,384 @@
> return(errcount ? HA_FAIL : HA_OK);
> }
>
> +/* care for G_main_setmaxdispatchdelay() function */
> +void
> +modify_setmaxdispatchdelay_value( int timeout_type )
> +{
> + int i;
> + long value;
> +
> + switch( timeout_type ){
> + case 0:
> + value = config->heartbeat_ms;
> + break;
> + case 1:
> + value = config->deadtime_ms;
> + break;
> + case 2:
> + value = config->warntime_ms;
> + break;
> + default:
> + return;
> + }
> +
> + for( i = 0; i < maxdispatchdelay_array->len; i++ ){
> + MaxDispatchDelay_Array_t *carray = g_ptr_array_index( maxdispatchdelay_array, i );
> +
> + if( carray->type == timeout_type ){
> + if( carray->mode == '*' ){
> + G_main_setmaxdispatchdelay( ( GSource * )carray->addr, value * carray->value );
> + } else if( carray->mode == '/' ){
> + G_main_setmaxdispatchdelay( ( GSource * )carray->addr, value / carray->value );
> + }
> + }
> + }
> +}
Could you reformat the code above a little bit so that it is <= 80col wide?
> +
> +/* care for Gmain_setall_id() function */
> +void
> +modify_setall_id_value( int timeout_type )
> +{
> + int i;
> + long value;
> +
> + switch( timeout_type ){
> + case 0:
> + value = config->heartbeat_ms;
> + break;
> + case 1:
> + value = config->deadtime_ms;
> + break;
> + case 2:
> + value = config->warntime_ms;
> + break;
> + default:
> + return;
> + }
The logic above seems to be a duplicate of the first half of
modify_setmaxdispatchdelay_value(). Is there any value in
breaking it out into its own (static?) function?
> +
> + for( i = 0; i < setall_array->len; i++ ){
> + SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i );
> +
> + if( sarray->type == timeout_type ){
> + if( sarray->calc[0] == '\0' ){
> + G_main_setmaxdispatchdelay_id( sarray->id, value );
> + } else {
> + char *ptr1 = sarray->calc;
> + char *ptr2;
> + unsigned long val = value;
> +
> + for( ; ; ){
> + char buf[2];
> + int num;
> +
> + ptr2 = strchr( ptr1, ' ' );
> + if( ptr2 != NULL ) ptr2 = strchr( ++ptr2, ' ' );
> +
> + if( ptr2 != NULL ){
> + *ptr2 = '\0';
> + }
> + sscanf( ptr1, "%s %d", buf, &num );
> + switch( buf[0] ){
> + case '+':
> + val += num;
> + break;
> + case '-':
> + val -= num;
> + break;
> + case '*':
> + val *= num;
> + break;
> + case '/':
> + val /= num;
> + break;
> + default:
> + break;
> + }
> + if( ptr2 == NULL ){
> + break;
> + } else {
> + *ptr2 = ' ';
> + ptr1 = ptr2 + 1;
> + }
> + }
> + G_main_setmaxdispatchdelay_id( sarray->id, val );
> + }
> + }
> + }
> +}
Again, 80col please. Perhaps if you change the logic around a bit
then making it narrower would be easier:
for( i = 0; i < setall_array->len; i++ ){
SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i );
if( sarray->type != timeout_type ) {
continue;
}
if( sarray->calc[0] == '\0' ){
G_main_setmaxdispatchdelay_id( sarray->id, value );
continue;
}
for( ; ; ){
...
Also, I'm not a big fan of declaring variables other inside blocks,
but I'm not sure what the linux-ha guidelines on this are.
> +
> +void
> +modify_channel_value( int timeout_type )
> +{
> + modify_setmaxdispatchdelay_value( timeout_type );
> + modify_setall_id_value( timeout_type );
> +}
> +
> +int
> +light_parse_config(const char * cfgfile )
The top half of light_parse_config() seems to contain a lot of code from
parse_config() Is it possible to restructure or split parse_config() in
order to reduce the volume of code duplication?
> +{
> + FILE * f;
> + char buf[MAXLINE];
> + char * cp;
> + char directive[MAXLINE];
> + size_t dirlength;
> + char option[MAXLINE];
> + size_t optionlength;
> + int i;
> + struct stat sbuf;
> +
> +#define OW_KEEPALIVE 1
> +#define OW_DEADTIME 2
> +#define OW_WARNTIME 4
> +#define OW_DEADPING 8
I'd rather if these defines were just done at the top of the file.
There doesn't seem to be any pressing need to #undef them at any stage.
> + int mode = 0;
> +
> + long new_keepalive_ms = 0;
> + long new_deadtime_ms = 0;
> + long new_warntime_ms = 0;
> + long new_deadping_ms = -1;
> + char *new_keepalive_string = NULL;
> + char *new_deadtime_string = NULL;
> + char *new_warntime_string = NULL;
> + char *new_deadping_string = NULL;
> +
> + if ((f = fopen(cfgfile, "r")) == NULL) {
> + ha_log(LOG_ERR, "Cannot open config file [%s]", cfgfile);
> + ha_log(LOG_INFO
> + , "An annotated sample %s file is provided in"
> + " the documentation."
> + , cfgfile);
> + ha_log(LOG_INFO
> + , "Please copy it to %s, read it, customize it"
> + ", and try again."
> + , cfgfile);
> +
> + return(HA_FAIL);
> + }
> +
> + fstat(fileno(f), &sbuf);
> + if( config->cfg_time == sbuf.st_mtime ){
> + ha_log( LOG_ERR, "The timestamp of %s is not changed!\n", cfgfile );
> + return 1;
> + }
> +
> + /* It's ugly, but effective */
> +
> + while (fgets(buf, MAXLINE, f) != NULL) {
> + char * bp = buf;
The line above contains trailing whitespace.
> + int IsOptionDirective=1;
> +
> + /* Skip over white space */
> + bp += strspn(bp, WHITESPACE);
> +
The line above contains trailing whitespace.
> + /* Zap comments on the line */
> + if ((cp = strchr(bp, COMMENTCHAR)) != NULL) {
> + *cp = EOS;
> + }
> +
The line above contains trailing whitespace.
> + /* Strip '\n' and '\r' chars */
> + if ((cp = strpbrk(bp, CRLF)) != NULL) {
> + *cp = EOS;
> + }
> +
> + /* Ignore blank (and comment) lines */
> + if (*bp == EOS) {
> + continue;
> + }
> +
> + /* Now we expect a directive name */
> +
> + dirlength = strcspn(bp, WHITESPACE);
> + strncpy(directive, bp, dirlength);
> + directive[dirlength] = EOS;
> +
> + bp += dirlength;
> +
> + /* Skip over Delimiters */
> + bp += strspn(bp, DELIMS);
> +
> + /* Now Check for the options-list stuff */
> + while (IsOptionDirective && *bp != EOS) {
> + optionlength = strcspn(bp, DELIMS);
> + strncpy(option, bp, optionlength);
> + option[optionlength] = EOS;
> + bp += optionlength;
> +
> + if( ! strcmp( directive, KEY_KEEPALIVE ) ){
> + new_keepalive_ms = get_msec( option );
> + new_keepalive_string = strdup( option );
> + } else if( ! strcmp( directive, KEY_DEADTIME ) ){
> + new_deadtime_ms = get_msec( option );
> + new_deadtime_string = strdup( option );
> + } else if( ! strcmp( directive, KEY_WARNTIME ) ){
> + new_warntime_ms = get_msec( option );
> + new_warntime_string = strdup( option );
> + } else if( ! strcmp( directive, KEY_DEADPING ) ){
> + new_deadping_ms = get_msec( option );
> + new_deadping_string = strdup( option );
> + }
> + /* Skip over Delimiters */
> + bp += strspn(bp, DELIMS);
> + }
> + }
> + fclose( f );
> +
> + if( new_keepalive_ms == 0 ){
> + ha_log( LOG_ERR, "keepalive value is not set.\n" );
> + return 2;
> + }
> +
> +
> + if( new_deadtime_ms == 0 ){
> + ha_log( LOG_ERR, "deadtime value is not set.\n" );
> + return 2;
> + }
> +
> + if( new_deadtime_ms != config->deadtime_ms ) mode |= OW_DEADTIME;
> +
> + if( new_keepalive_ms != config->heartbeat_ms ) mode |= OW_KEEPALIVE;
> + if( new_warntime_ms != config->warntime_ms ) mode |= OW_WARNTIME;
> + if( new_deadping_ms != config->deadping_ms ) mode |= OW_DEADPING;
> +
> + if( new_deadtime_ms < new_keepalive_ms * 2 ){
> + ha_log( LOG_ERR, "deadtime value is smaller than keepalive time * 2.\n" );
> + ha_log( LOG_ERR, "keepalive value: %ld\n", new_keepalive_ms );
> + ha_log( LOG_ERR, "deadtime value: %ld\n", new_deadtime_ms );
> + return 3;
> + }
> +
> + if( new_deadping_ms < new_keepalive_ms * 2 ){
> + ha_log( LOG_ERR, "deadping value is smaller than keepalive time * 2.\n" );
> + ha_log( LOG_ERR, "keepalive value: %ld\n", new_keepalive_ms );
> + ha_log( LOG_ERR, "deadping value: %ld\n", new_deadping_ms );
> + return 4;
> + }
> +
> +/* Maybe following is safe */
Could you explain on what might be unsafe about it?
> + if( mode & ( OW_KEEPALIVE | OW_DEADTIME ) ){
> + if( new_keepalive_ms > config->deadtime_ms ){
> + modify_deadtime_value( new_deadtime_ms, new_deadtime_string );
> + free( new_deadtime_string );
> + modify_keepalive_value( new_keepalive_ms, new_keepalive_string );
> + free( new_keepalive_string );
> + } else {
> + modify_keepalive_value( new_keepalive_ms, new_keepalive_string );
> + free( new_keepalive_string );
> + modify_deadtime_value( new_deadtime_ms, new_deadtime_string );
> + free( new_deadtime_string );
> + }
> + } else {
> + if( mode & OW_KEEPALIVE ){
> + modify_keepalive_value( new_keepalive_ms, new_keepalive_string );
> + free( new_keepalive_string );
> + }
> +
> + if( mode & OW_DEADTIME ){
> + modify_deadtime_value( new_deadtime_ms, new_deadtime_string );
> + free( new_deadtime_string );
> + }
> + }
> +
> + if( mode & OW_WARNTIME ){
> + SetParameterValue( KEY_WARNTIME, new_warntime_string );
> + config->warntime_ms = new_warntime_ms;
> + modify_channel_value( 2 );
> + }
> +
> + if( mode & OW_DEADPING ){
> + SetParameterValue( KEY_DEADPING, new_deadping_string );
> + config->deadping_ms = new_deadping_ms;
> +
> + for( i = 0; i < config->nodecount; i++ ) {
> + // config->nodes[i].has_resources = DoManageResources;
> + if (config->nodes[i].nodetype == PINGNODE_I) {
> + config->nodes[i].dead_ticks = msto_longclock( new_deadping_ms );
> + }
> + }
> +
> + modify_channel_value( 3 );
> + }
> +
> + return 0;
> +#undef OW_KEEPALIVE
> +#undef OW_DEADTIME
> +#undef OW_WARNTIME
> +}
> +
> +void
> +modify_keepalive_value( long new_keepalive_ms, char *new_keepalive_string )
> +{
> + int i;
> +
> + cl_log( LOG_INFO, "%s: keepalive %ld time is over-written to new value %ld.\n", __FUNCTION__, config->heartbeat_ms, new_keepalive_ms );
> + config->heartbeat_ms = new_keepalive_ms;
> + SetParameterValue( KEY_KEEPALIVE, new_keepalive_string );
> +
> + modify_channel_value( 0 );
> +#define GTIMEOUT(GS) ((struct GTimeoutAppend*)((void*)(GS)))
> + for( i = 0; i < timeout_add_full_array->len; i++ ){
> +/* This part is cut from Gmain_timeout_add_full() in lib/clplumbing/GSource.c */
> + struct GTimeoutAppend *append;
> + guint id = g_array_index( timeout_add_full_array, guint, i );
> + GSource *source;
> +
> + source = g_main_context_find_source_by_id( NULL, id );
> +
> + if( source != NULL ){
> + append = GTIMEOUT( source );
> + // append->nexttime = add_longclock( time_longclock(), msto_longclock( config->heartbeat_ms ) );
> + append->interval = config->heartbeat_ms;
There seems to be mixed tabs and spaces in the indentation of the line above.
> + }
> + }
> +
> +/* For ccm */
Could you indent the comment on the line above?
> + {
> + struct ha_msg * msg;
> + char keepalive[64];
> +
> + snprintf( keepalive, sizeof( keepalive ), "%lx", config->heartbeat_ms );
> + msg = ha_msg_new( 4 );
> + ha_msg_add( msg, F_TYPE, "new keepalive" );
> + ha_msg_add( msg, F_ORIG, curnode->nodename );
> + ha_msg_add( msg, F_STATUS, "new keepalive" );
> + ha_msg_add( msg, F_KEEPALIVE, keepalive );
> +
> + heartbeat_monitor( msg, KEEPIT, NULL );
> + }
> +#undef GTIMEOUT
> +}
> +
> +void
> +modify_deadtime_value( long new_deadtime_ms, char *new_deadtime_string )
I haven't investigated this, but how is the deadtime set on startup?
Would it make sense to abstract it a bit so that the same code can be
used for startup and modify? Perhaps a modified modify_deadtime_value()
could be used in both cases. Ditto for modify_keepalive_value()
> +{
> + int i;
> +
> + cl_log( LOG_INFO, "%s: deadtime %ld time is over-written to new value %ld.\n", __FUNCTION__, config->deadtime_ms, new_deadtime_ms );
> + SetParameterValue( KEY_DEADTIME, new_deadtime_string );
> +
> +/* this part is taken from init_config() */
> + for( i = 0; i < config->nodecount; i++ ) {
> + // config->nodes[i].has_resources = DoManageResources;
If the line above isn't needed, please just remove it.
> + if( config->nodes[i].nodetype != PINGNODE_I && strcmp( config->nodes[i].nodename, curnode->nodename ) == 0 ){
> + config->nodes[i].dead_ticks = msto_longclock( new_deadtime_ms );
> + }
> + }
> +/* To HERE */
> +
> +/* for heartbeat.c hb_init_watchdog_interval() */
> + if( watchdog_timeout_ms == 0L || watchdog_timeout_ms == ( config->deadtime_ms + 10 ) ) watchdog_timeout_ms = new_deadtime_ms + 10;
> +/* To HERE */
> + config->deadtime_ms = new_deadtime_ms;
> +
> + modify_channel_value( 1 );
> +
> +/* for heartbeat.c hb_reregister_with_apphbd() and hb_init_watchdog() */
> + if ( UseApphbd == TRUE ) {
> + hb_init_register_with_apphbd_dummy();
> + }
> +/* To HERE */
> +}
> +
Are the "To HERE" commends just debugging/development stuff? If so,
could you remove them. Also, could you please indent the other comments
in this function.
> /*
> * Dump the configuration file - as a configuration file :-)
> *
> diff -uNr heartbeat-2.0.8/heartbeat/hb_api.c heartbeat-2.0.8.orig/heartbeat/hb_api.c
> --- heartbeat-2.0.8/heartbeat/hb_api.c 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_api.c 2007-06-04 15:16:07.000000000 +0900
> @@ -198,7 +198,7 @@
> seqno_t last_seq;
> };
>
> -
> +GPtrArray *maxdispatchdelay_array;
>
> static int
> should_msg_sendto_client(client_proc_t* client, struct ha_msg* msg)
> @@ -1290,7 +1290,10 @@
> , APIclients_input_dispatch
> , client, G_remove_client);
> G_main_setdescription((GSource*)client->gsource, "API client");
> G_main_setmaxdispatchdelay((GSource*)client->gsource, config->heartbeat_ms);
> +
> + maxdispatchdelay_array_add( ( void * )client->gsource, '*', 1, 0, 0 );
> +
> G_main_setmaxdispatchtime((GSource*)client->gsource, 100);
> if (ANYDEBUG) {
> cl_log(LOG_DEBUG
> diff -uNr heartbeat-2.0.8/heartbeat/hb_config.h heartbeat-2.0.8.orig/heartbeat/hb_config.h
> --- heartbeat-2.0.8/heartbeat/hb_config.h 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_config.h 2007-06-15 15:44:23.000000000 +0900
> @@ -29,7 +29,13 @@
> int add_node(const char * value, int nodetype);
> int parse_authfile(void);
> int init_config(const char * cfgfile);
> +int light_parse_config( const char * cfgfile );
> +void modify_keepalive_value( long, char * );
> +void modify_deadtime_value( long, char * );
> int StringToBaud(const char * baudstr);
> const char * GetParameterValue(const char * name);
> +void hb_init_register_with_apphbd_dummy( void );
> +void maxdispatchdelay_array_add( void *addr, char mode, int value, pid_t pid, int type );
> +void setall_array_add( long id, int type, char * calc );
>
> #endif /* _HB_CONFIG_H */
> diff -uNr heartbeat-2.0.8/heartbeat/hb_rexmit.c heartbeat-2.0.8.orig/heartbeat/hb_rexmit.c
> --- heartbeat-2.0.8/heartbeat/hb_rexmit.c 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_rexmit.c 2007-05-31 11:29:24.000000000 +0900
> @@ -35,6 +35,9 @@
> #include <clplumbing/GSource.h>
> #include <clplumbing/cl_random.h>
>
> +#include <hb_config.h>
> +
> +extern GPtrArray *setall_array;
>
> static void schedule_rexmit_request(struct node_info* node, seqno_t seq, int delay);
>
> @@ -235,7 +240,9 @@
>
> sourceid = Gmain_timeout_add_full(G_PRIORITY_HIGH - 1, delay,
> send_rexmit_request, ri, NULL);
> G_main_setall_id(sourceid, "retransmit request", config->heartbeat_ms/2, 10);
> +
> + setall_array_add( sourceid, 0, strdup( "/ 2" ) );
>
> if (sourceid == 0){
> cl_log(LOG_ERR, "%s: scheduling a timeout event failed",
> diff -uNr heartbeat-2.0.8/heartbeat/hb_signal.c heartbeat-2.0.8.orig/heartbeat/hb_signal.c
> --- heartbeat-2.0.8/heartbeat/hb_signal.c 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_signal.c 2007-05-31 10:52:29.000000000 +0900
> @@ -60,7 +60,18 @@
> #define HB_SIG_PARENT_DEBUG_USR2_SIG 0x0020UL
> #define HB_SIG_REREAD_CONFIG_SIG 0x0040UL
> #define HB_SIG_FALSE_ALARM_SIG 0x0080UL
> +#define HB_SIG_LIGHT_REREAD_CONFIG_SIG 0x0100UL
>
> +/* Maybe no need
> +typedef struct MaxDispatchDelay_Array {
> + GCHSource *addr;
> + char mode;
> + int value;
> + int pid;
> + int type;
> +} MaxDispatchDelay_Array_t;
Please use tabs not spaces for indentation :)
> +extern GPtrArray *maxdispatchdelay_array;
> +*/
>
> /*
> * This function does NOT have the same semantics as setting SIG_IGN.
> @@ -298,6 +310,9 @@
> void
> hb_signal_reread_config_action(void)
> {
> +/* Maybe no need
> + int i;
> +*/
> int j;
> int signal_children = 0;
>
> @@ -347,6 +363,19 @@
> return_to_orig_privs();
> for (j=0; j < procinfo->nprocs; ++j) {
> if (procinfo->info+j != curproc) {
> +/* Maybe no need
> + for( i = 0; ; i++ ){
> + MaxDispatchDelay_Array_t *carray = g_ptr_array_index( maxdispatchdelay_array, i );
> + if( carray == NULL ) break;
> + if( carray->pid == procinfo->info[j].pid ){
> + ha_log( LOG_INFO, "Remove infomation from maxdispatchdelay_array mem = %x, pid = %d\n", ( int )carray, carray->pid );
> + g_ptr_array_remove( maxdispatchdelay_array, carray );
> + free( carray );
> + i--;
> + }
> + }
> +*/
It looks like it is needed to me. But if not, can you just remove it?
> + ha_log( LOG_INFO, "SP: CL_KILL procinfo->info[%d].pid = %d, getpid = %d by SIGHUP", j, ( int )procinfo->info[j].pid, ( int )getpid() );
> CL_KILL(procinfo->info[j].pid, SIGHUP);
> }
> }
> @@ -354,6 +383,18 @@
> }
> }
>
> +void
> +hb_signal_light_reread_config_handler( int sig )
> +{
> + __hb_signal_pending |= HB_SIG_LIGHT_REREAD_CONFIG_SIG;
> +}
> +
> +void
> +hb_signal_light_reread_config_action( void )
> +{
> + light_parse_config( CONFIG_NAME );
> + return;
> +}
>
> void
> hb_signal_false_alarm_handler(int sig)
> @@ -444,6 +485,10 @@
> hb_signal_reread_config_action();
> }
>
> + if (handlers&HB_SIG_LIGHT_REREAD_CONFIG_SIG) {
> + hb_signal_light_reread_config_action();
> + }
> +
> if (handlers&HB_SIG_FALSE_ALARM_SIG) {
> hb_signal_false_alarm_action();
> }
> @@ -478,6 +523,7 @@
> , {SIGALRM, hb_signal_false_alarm_handler, 1}
> , {SIGUSR1, hb_signal_debug_usr1_handler, 1}
> , {SIGUSR2, hb_signal_debug_usr2_handler, 1}
> + , {SIGRTMAX, hb_signal_light_reread_config_handler, 1}
Is using SIGRTMAX in this way safe? Is SIGHUP already used for something else?
> , {0, 0, 0}
> };
>
> @@ -634,6 +680,7 @@
> { {SIGTERM, hb_signal_term_handler, 1}
> , {SIGUSR1, parent_hb_signal_debug_usr1_handler, 1}
> , {SIGUSR2, parent_hb_signal_debug_usr2_handler, 1}
> + , {SIGRTMAX, hb_signal_light_reread_config_handler, 1}
> , {SIGALRM, hb_signal_false_alarm_handler, 1}
> , {0, 0, 0}
> };
> diff -uNr heartbeat-2.0.8/heartbeat/hb_signal.h heartbeat-2.0.8.orig/heartbeat/hb_signal.h
> --- heartbeat-2.0.8/heartbeat/hb_signal.h 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/hb_signal.h 2007-05-31 10:52:29.000000000 +0900
> @@ -57,6 +57,10 @@
>
> void hb_signal_reread_config_action(void);
>
> +void hb_signal_light_reread_config_handler(int sig);
> +
> +void hb_signal_light_reread_config_action(void);
> +
> void hb_signal_false_alarm_handler(int sig);
>
> void hb_signal_false_alarm_action(void);
> diff -uNr heartbeat-2.0.8/heartbeat/heartbeat.c heartbeat-2.0.8.orig/heartbeat/heartbeat.c
> --- heartbeat-2.0.8/heartbeat/heartbeat.c 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/heartbeat/heartbeat.c 2007-06-18 10:46:14.000000000 +0900
> @@ -316,7 +317,8 @@
>
> char * watchdogdev = NULL;
> static int watchdogfd = -1;
> -static int watchdog_timeout_ms = 0L;
> +// static int watchdog_timeout_ms = 0L;
> +int watchdog_timeout_ms = 0L;
>
> int shutdown_in_progress = FALSE;
> int startup_complete = FALSE;
> @@ -342,6 +344,9 @@
> GTRIGSource* write_delcachefile = NULL;
> extern GSList* del_node_list;
>
> +GPtrArray *maxdispatchdelay_array;
> +GPtrArray *setall_array;
> +GArray * timeout_add_full_array;
>
> #undef DO_AUDITXMITHIST
> #ifdef DO_AUDITXMITHIST
> @@ -490,6 +495,33 @@
> NULL,
> };
>
> +void
> +maxdispatchdelay_array_add( void *addr, char mode, int value, pid_t pid, int type )
> +{
> + MaxDispatchDelay_Array_t *carray;
> +
> + carray = malloc( sizeof( MaxDispatchDelay_Array_t ) );
Should this (and others) be cl_malloc()?
> + carray->addr = addr;
> + carray->mode = mode;
> + carray->value = value;
> +
> + carray->pid = pid;
> + carray->type = type;
> + g_ptr_array_add( maxdispatchdelay_array, ( gpointer )carray );
> +}
> +
> +void
> +setall_array_add( long id, int type, char * calc )
> +{
> + SetAll_Array_t *sarray;
> +
> + sarray = malloc( sizeof( SetAll_Array_t ) );
> + sarray->id = id;
> + sarray->type = type;
> + sarray->calc = calc;
> + g_ptr_array_add( setall_array, ( gpointer )sarray );
> +}
> +
> static void
> init_procinfo()
> {
> @@ -747,10 +779,13 @@
> FifoChildSource = G_main_add_IPC_Channel(PRI_FIFOMSG
> , fifochildipc[P_READFD]
> , FALSE, FIFO_child_msg_dispatch, NULL, NULL);
> + cl_log( LOG_INFO, "%s use config->heartbeat_ms: %ld\n", __FUNCTION__, config->heartbeat_ms );
> G_main_setmaxdispatchdelay((GSource*)FifoChildSource, config->heartbeat_ms);
> G_main_setmaxdispatchtime((GSource*)FifoChildSource, 50);
> G_main_setdescription((GSource*)FifoChildSource, "FIFO");
> -
> +
> + maxdispatchdelay_array_add( ( void * )FifoChildSource, '*', 1, pid, 0 );
> +
> return HA_OK;
> }
>
> @@ -1462,6 +1503,7 @@
> G_main_setmaxdispatchtime((GSource*)s, 50);
> G_main_setdescription((GSource*)s, "write child");
>
> + maxdispatchdelay_array_add( ( void * )s, '/', 4, sysmedia[j]->wchan[P_WRITEFD]->farside_pid, 0 );
>
> /* Connect up the read child IPC channel... */
> s = G_main_add_IPC_Channel(PRI_READPKT
> @@ -1474,8 +1518,8 @@
> G_main_setmaxdispatchtime((GSource*)s, 50);
> G_main_setdescription((GSource*)s, "read child");
>
> -}
> -
> + maxdispatchdelay_array_add( ( void * )s, '/', 4, sysmedia[j]->rchan[P_WRITEFD]->farside_pid, 0 );
> + }
>
> /*
> * Things to do on a periodic basis...
> @@ -1486,6 +1533,9 @@
> , hb_send_local_status, NULL, NULL);
> G_main_setall_id(id, "send local status", 10+config->heartbeat_ms/2, 50);
>
> + g_array_append_val( timeout_add_full_array, id );
> + setall_array_add( id, 0, strdup( "/ 2 + 10" ) );
> +
> id=Gmain_timeout_add_full(PRI_AUDITCLIENT
> , config->initial_deadtime_ms
> , set_init_deadtime_passed_flag
> @@ -1493,6 +1544,8 @@
> , NULL);
> G_main_setall_id(id, "init deadtime passed", config->warntime_ms, 50);
>
> + setall_array_add( id, 2, strdup( "" ) );
> +
> /* Dump out memory stats periodically... */
> memstatsinterval = (debug_level ? 10*60*1000 : ONEDAY*1000);
> id=Gmain_timeout_add_full(PRI_DUMPSTATS, memstatsinterval
> @@ -1515,11 +1568,17 @@
> id=Gmain_timeout_add_full(PRI_CHECKSIGS, config->heartbeat_ms
> , Gmain_hb_signal_process_pending, NULL, NULL);
> G_main_setall_id(id, "check for signals", 10+config->heartbeat_ms/2, 50);
>
> + cl_log( LOG_INFO, "heartbeat.c: G_main_timeout_add_full %d\n", id );
> + g_array_append_val( timeout_add_full_array, id );
> + setall_array_add( id, 0, strdup( "/ 2 + 10" ) );
> +
> id=Gmain_timeout_add_full(PRI_FREEMSG, 500
> , Gmain_update_msgfree_count, NULL, NULL);
> G_main_setall_id(id, "update msgfree count", config->deadtime_ms, 50);
>
> + setall_array_add( id, 1, strdup( "" ) );
> +
> if (UseApphbd) {
> Gmain_timeout_add_full(PRI_DUMPSTATS
> , 60*(1000-10) /* Not quite on a minute boundary */
> @@ -1817,7 +1875,8 @@
> G_main_setmaxdispatchdelay((GSource*)regsource, config->deadtime_ms);
> G_main_setmaxdispatchtime((GSource*)regsource, 20);
> G_main_setdescription((GSource*)regsource, "client registration");
>
> + maxdispatchdelay_array_add( ( void * )regsource, '*', 1, 0, 1 );
>
> if (regsource == NULL) {
> cl_log(LOG_DEBUG
> @@ -1908,6 +1967,8 @@
> void
> hb_initiate_shutdown(int quickshutdown)
> {
> + int i;
> +
> if (ANYDEBUG) {
> cl_log(LOG_DEBUG, "hb_initiate_shutdown() called.");
> }
> @@ -1930,6 +1991,24 @@
> , "Shutdown delayed until Communication is up.");
> return;
> }
> +
> +/*
> + for( ; ; ){
> + MaxDispatchDelay_Array_t *carray = g_ptr_array_index( maxdispatchdelay_array, 0 );
> + if( carray == NULL ) break;
> + g_ptr_array_remove( maxdispatchdelay_array, carray );
> + free( carray );
> + }
> +*/
If the code above isn't needed, please remove it.
> + g_ptr_array_free( maxdispatchdelay_array, TRUE );
> +
> + for( i = 0; i < setall_array->len; i++ ){
> + SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i );
> + if( sarray != NULL ) free( sarray->calc );
> + }
> + g_ptr_array_free( setall_array, TRUE );
> + g_array_free( timeout_add_full_array, TRUE );
> +
> send_local_status();
> if (!quickshutdown && DoManageResources) {
> /* THIS IS RESOURCE WORK! FIXME */
> @@ -4079,7 +4166,10 @@
> send_cluster_msg(msg);
>
> id = Gmain_timeout_add(1000, send_reqnodes_msg, (gpointer)i);
> G_main_setall_id(id, "send_reqnodes_msg", config->heartbeat_ms, 100);
> +
> + setall_array_add( id, 0, strdup( "" ) );
> +
> return FALSE;
> }
>
> @@ -4694,6 +4787,9 @@
> /*init table for nodename/uuid lookup*/
> inittable();
>
> + maxdispatchdelay_array = g_ptr_array_new();
> + setall_array = g_ptr_array_new();
> + timeout_add_full_array = g_array_new( FALSE, FALSE, sizeof( guint ) );
>
> /*
> * We think we just performed an "exec" of ourselves to restart.
> @@ -5024,6 +5121,11 @@
> }
> }
>
> +void hb_init_register_with_apphbd_dummy( void )
> +{
> + hb_init_register_with_apphbd();
> +}
> +
> static gboolean
> hb_reregister_with_apphbd(gpointer dummy)
> {
> diff -uNr heartbeat-2.0.8/include/heartbeat.h heartbeat-2.0.8.orig/include/heartbeat.h
> --- heartbeat-2.0.8/include/heartbeat.h 2007-01-12 11:57:05.000000000 +0900
> +++ heartbeat-2.0.8.orig/include/heartbeat.h 2007-05-31 10:50:25.000000000 +0900
> @@ -370,6 +370,20 @@
> char* path; /* Path (argv[0])? */
> };
>
> +typedef struct MaxDispatchDelay_Array {
> + void *addr;
> + char mode;
> + int value;
> + int pid;
> + int type;
> +} MaxDispatchDelay_Array_t;
> +
> +typedef struct SetAll_Array {
> + unsigned long id;
> + int type;
> + char * calc;
> +} SetAll_Array_t;
> +
> int api_remove_client_pid(pid_t c_pid, const char * reason);
>
>
> diff -uNr heartbeat-2.0.8/lib/clplumbing/GSource.c heartbeat-2.0.8.orig/lib/clplumbing/GSource.c
> --- heartbeat-2.0.8/lib/clplumbing/GSource.c 2007-01-12 11:57:07.000000000 +0900
> +++ heartbeat-2.0.8.orig/lib/clplumbing/GSource.c 2007-05-31 10:46:30.000000000 +0900
> @@ -1599,7 +1599,8 @@
> {
> G_main_setdescription_id(id, description);
> G_main_setmaxdispatchdelay_id(id, delay);
> - G_main_setmaxdispatchtime_id(id, delay);
> + // G_main_setmaxdispatchtime_id(id, delay);
> + G_main_setmaxdispatchtime_id(id, elapsed);
> }
The GSource.c fragment above is already in the dev tree :)
> static void TempProcessRegistered(ProcTrack* p);
> diff -uNr heartbeat-2.0.8/membership/ccm/ccm.c heartbeat-2.0.8.orig/membership/ccm/ccm.c
> --- heartbeat-2.0.8/membership/ccm/ccm.c 2007-01-12 11:57:08.000000000 +0900
> +++ heartbeat-2.0.8.orig/membership/ccm/ccm.c 2007-06-13 11:07:52.000000000 +0900
> @@ -113,6 +113,9 @@
> return HA_OK;
> }
>
> +extern ccm_info_t* ccm_info_saved;
> +extern ll_cluster_t* hb_fd_saved;
> +
> int
> ccm_control_process(ccm_info_t *info, ll_cluster_t * hb)
> {
> @@ -149,6 +152,13 @@
> }
> ha_msg_del(msg);
> msg = newmsg;
> + } else if(strcasecmp(type, "new keepalive") == 0){
> + hb_fd_saved->llc_ops->signoff( hb_fd_saved, FALSE );
> + hb_fd_saved->llc_ops->signon( hb_fd_saved, "ccm" );
> + sleep( 3 );
What is the purpose of sleep(3). It seems strange that it is hard-coded.
> +
> + ccm_configure_timeout( hb_fd_saved, ccm_info_saved );
> + return TRUE;
> } else if(strcasecmp(type, T_STATUS) == 0){
> const char* nodetype;
> const char* site;
> diff -uNr heartbeat-2.0.8/membership/ccm/ccm.h heartbeat-2.0.8.orig/membership/ccm/ccm.h
> --- heartbeat-2.0.8/membership/ccm/ccm.h 2007-01-12 11:57:08.000000000 +0900
> +++ heartbeat-2.0.8.orig/membership/ccm/ccm.h 2007-06-08 14:20:24.000000000 +0900
> @@ -469,6 +469,8 @@
> int has_quorum; /* -1, not set, 0, no quorum, 1, has quorum */
> } ccm_info_t;
>
> +void ccm_configure_timeout(ll_cluster_t *, ccm_info_t *);
> +
> /*
> * datastructure passed to the event loop.
> * This acts a handle, and should not be interpreted
> diff -uNr heartbeat-2.0.8/membership/ccm/ccm_statemachine.c heartbeat-2.0.8.orig/membership/ccm/ccm_statemachine.c
> --- heartbeat-2.0.8/membership/ccm/ccm_statemachine.c 2007-01-12 11:57:08.000000000 +0900
> +++ heartbeat-2.0.8.orig/membership/ccm/ccm_statemachine.c 2007-06-18 11:36:08.000000000 +0900
> @@ -201,7 +201,7 @@
> /* */
> /* timeout configuration function */
> /* */
> -static void
> +void
> ccm_configure_timeout(ll_cluster_t *hb, ccm_info_t *info)
> {
> long keepalive = hb->llc_ops->get_keepalive(hb);
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev at lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
More information about the Linux-HA-Dev
mailing list