[Linux-ha-dev] dumping core
Alan Robertson
alanr at unix.sh
Fri Oct 15 14:47:03 MDT 2004
Alan Robertson wrote:
> Lars Marowsky-Bree wrote:
>
>> On 2004-10-14T20:41:39, Alan Robertson <alanr at unix.sh> wrote:
>>
>>
>>> It would be nice to capture every possible core dump - but I'm not
>>> fanatically worried about that - because core dumps are (and should
>>> be!) rare. From this perspective, I thought LMB's proposal was
>>> overkill and messy (perhaps a bit hard to keep cleaned up too).
>>
>>
>>
>> I had a proposal? Woah, I don't even remember ;-)
>>
>>
>>> So, I have what I think is a minimal solution which deals with the
>>> security issues as best as they can be, while allowing each process to
>>> dump its dump as needed...
>>>
>>> The core dump directory hierarchy is rooted at
>>> /var/run/heartbeat/cores, and it is owned by root and mode 755 (or 511
>>> or whatever suits you)
>>>
>>> It would be structured like this:
>>>
>>> /var/run/heartbeat/cores 755 owned by root
>>> /var/run/heartbeat/cores/hacluster 700 owned by hacluster
>>> /var/run/heartbeat/cores/root 700 owned by root
>>> /var/run/heartbeat/cores/nobody 700 owned by nobody
>>>
>>> This would permit us to write one core process per user id we run as.
>>> I think this is enough, and it keeps us from filling up the filesystem
>>> if something somehow goes nuts and our wonderful restart capability
>>> keeps restarting us again and again and again ;-)
>
> /var/run/heartbeat/cores isn't a good choice. /var/run isn't
> readable/executable by world, so non-root processes can't chdir there...
> -- Sigh :-(
>
> How about /var/lib/heartbeat/cores ?
OK...
Patch implementing this feature is attached. Along with two new files that
actually do the work.
There are two new ha.cf options:
coredumps (true|false) -- defaults to TRUE
coreroot (pathname) -- where to root the core directory hierarchy
(defaults to /var/lib/heartbeat/cores)
{this is may be good for cd-booting firewalls}
--
Alan Robertson <alanr at unix.sh>
"Openness is the foundation and preservative of friendship... Let me claim
from you at all times your undisguised opinions." - William Wilberforce
-------------- next part --------------
? foo
? lgpl
? stonith.out
? crm/common/BEAM-complaints
? crm/crmd/utils.i
? heartbeat/gen_authkeys
? lib/checkpointd/BEAM-complaints
? lib/clplumbing/BEAM-complaints
? lib/clplumbing/foo
? lib/cmsclient/BEAM-complaints
? lib/eventd/BEAM-complaints
? lib/pils/stonith.out
? lib/plugins/stonith/boilerplate
? lib/plugins/stonith/cscope.out
? lib/plugins/stonith/foo.c
Index: Makefile.am
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/Makefile.am,v
retrieving revision 1.49
diff -u -r1.49 Makefile.am
--- Makefile.am 23 Aug 2004 14:44:12 -0000 1.49
+++ Makefile.am 15 Oct 2004 20:39:54 -0000
@@ -47,7 +47,8 @@
MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure DRF/config-h.in \
DRF/stamp-h.in libtool.m4 ltdl.m4
-
+coredir = @HA_COREDIR@
+hauser = @HA_CCMUSER@
## proc-ha is left out from SUBDIRS (completely obsolete)
@@ -80,6 +81,20 @@
tar -zxf $(TARFILE)
cd $(distdir) ; dpkg-buildpackage -rfakeroot -us -uc
rm -rf $(distdir)
+
+install-exec-local:
+ -test -p $(DESTDIR)$(coredir) || mkdir -p $(DESTDIR)$(coredir)
+ -chown root $(DESTDIR)$(coredir)
+ -chmod 755 $(DESTDIR)$(coredir)
+ -test -p $(DESTDIR)$(coredir)/root || mkdir -p $(DESTDIR)$(coredir)/root
+ -chown root $(DESTDIR)$(coredir)/root
+ -chmod 700 $(DESTDIR)$(coredir)/root
+ -test -p $(DESTDIR)$(coredir)/nobody || mkdir -p $(DESTDIR)$(coredir)/nobody
+ -chown nobody $(DESTDIR)$(coredir)/nobody
+ -chmod 700 $(DESTDIR)$(coredir)/nobody
+ -test -p $(DESTDIR)$(coredir)/$(hauser) || mkdir -p $(DESTDIR)$(coredir)/$(hauser)
+ -chown $(hauser) $(DESTDIR)$(coredir)/$(hauser)
+ -chmod 700 $(DESTDIR)$(coredir)/$(hauser)
# "pkg" format for Solaris etc.
pkg:
Index: configure.in
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/configure.in,v
retrieving revision 1.342
diff -u -r1.342 configure.in
--- configure.in 11 Oct 2004 07:40:43 -0000 1.342
+++ configure.in 15 Oct 2004 20:39:59 -0000
@@ -130,6 +130,10 @@
AC_SUBST(HA_VARLIBDIR)
AC_DEFINE_UNQUOTED(HA_VARLIBDIR,"$HA_VARLIBDIR", var lib directory)
+HA_COREDIR="${HA_VARLIBDIR}/${HB_PKG}/cores"
+AC_SUBST(HA_COREDIR)
+AC_DEFINE_UNQUOTED(HA_COREDIR,"$HA_COREDIR", root directory to drop core files in)
+
HA_LOGDAEMON_IPC="${HA_VARLIBDIR}/log_daemon"
AC_SUBST(HA_LOGDAEMON_IPC)
AC_DEFINE_UNQUOTED(HA_LOGDAEMON_IPC, "$HA_LOGDAEMON_IPC", Logging Daemon IPC)
Index: contrib/ipfail/ipfail.c
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/contrib/ipfail/ipfail.c,v
retrieving revision 1.38
diff -u -r1.38 ipfail.c
--- contrib/ipfail/ipfail.c 1 Oct 2004 13:01:42 -0000 1.38
+++ contrib/ipfail/ipfail.c 15 Oct 2004 20:40:00 -0000
@@ -60,6 +60,7 @@
#include <clplumbing/cl_malloc.h>
#include <clplumbing/GSource.h>
#include <clplumbing/Gmain_timeout.h>
+#include <clplumbing/coredumps.h>
#include "ipfail.h"
/* ICK! global vars. */
@@ -125,16 +126,24 @@
exit(100);
}
- if (!strcmp(parameter, "on"))
+ if (!strcmp(parameter, "on")) {
auto_failback = 1;
- else
+ }else{
auto_failback = 0;
-
+ }
cl_log(LOG_DEBUG, "auto_failback -> %i (%s)", auto_failback,
parameter);
cl_free(parameter);
- } else
+ }else{
cl_log(LOG_ERR, "Couldn't get auto_failback setting.");
+ }
+ /* See if we should drop cores somewhere odd... */
+ parameter = hb->llc_ops->get_parameter(hb, KEY_COREROOTDIR);
+ if (parameter) {
+ cl_set_corerootdir(parameter);
+ cl_cdtocoredir();
+ }
+ cl_cdtocoredir();
set_callbacks(hb);
Index: heartbeat/config.c
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/heartbeat/config.c,v
retrieving revision 1.130
diff -u -r1.130 config.c
--- heartbeat/config.c 8 Oct 2004 18:37:06 -0000 1.130
+++ heartbeat/config.c 15 Oct 2004 20:40:06 -0000
@@ -54,6 +54,7 @@
#include <pils/plugin.h>
#include <clplumbing/realtime.h>
#include <clplumbing/netstring.h>
+#include <clplumbing/coredumps.h>
#include <HBcomm.h>
#include <hb_module.h>
#include <hb_api.h>
@@ -90,6 +91,8 @@
static int set_msgfmt(const char*);
static int set_register_to_apphbd(const char *);
static int set_badpack_warn(const char*);
+static int set_coredump(const char*);
+static int set_corerootdir(const char*);
/*
* Each of these parameters is is automatically recorded by
@@ -127,6 +130,8 @@
, {KEY_MSGFMT, set_msgfmt, TRUE, "classic", "message format in the wire"}
, {KEY_REGAPPHBD, set_register_to_apphbd, FALSE, NULL, "register with apphbd"}
, {KEY_BADPACK, set_badpack_warn, TRUE, "true", "warn about bad packets"}
+, {KEY_COREDUMP, set_coredump, TRUE, "true", "enable Linux-HA core dumps"}
+, {KEY_COREROOTDIR,set_corerootdir, TRUE, NULL, "set root directory of core dump area"}
};
static const struct WholeLineDirective {
@@ -2015,6 +2020,28 @@
}
+static int
+set_coredump(const char* value)
+{
+ gboolean docore;
+ int rc;
+ if ((rc = str_to_boolean(value, &docore)) == HA_OK) {
+ if (cl_enable_coredumps(docore) < 0 ) {
+ rc = HA_FAIL;
+ }
+ }
+ return rc;
+}
+
+static int
+set_corerootdir(const char* value)
+{
+ if (cl_set_corerootdir(value) < 0) {
+ cl_perror("Invalid core directory [%s]", value);
+ return HA_FAIL;
+ }
+ return HA_OK;
+}
Index: heartbeat/heartbeat.c
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/heartbeat/heartbeat.c,v
retrieving revision 1.326
diff -u -r1.326 heartbeat.c
--- heartbeat/heartbeat.c 8 Oct 2004 21:31:48 -0000 1.326
+++ heartbeat/heartbeat.c 15 Oct 2004 20:40:18 -0000
@@ -250,6 +250,7 @@
#include <clplumbing/cl_signal.h>
#include <clplumbing/cpulimits.h>
#include <clplumbing/netstring.h>
+#include <clplumbing/coredumps.h>
#include <heartbeat.h>
#include <ha_msg.h>
#include <hb_api.h>
@@ -874,6 +875,7 @@
cl_make_realtime(-1, hb_realtime_prio, 16, 8);
set_proc_title("%s: read: %s %s", cmdname, mp->type, mp->name);
drop_privs(0, 0); /* Become nobody */
+ cl_cdtocoredir();
hb_signal_process_pending();
curproc->pstat = RUNNING;
@@ -931,6 +933,7 @@
set_proc_title("%s: write: %s %s", cmdname, mp->type, mp->name);
cl_make_realtime(-1, hb_realtime_prio, 16, 8);
drop_privs(0, 0); /* Become nobody */
+ cl_cdtocoredir();
curproc->pstat = RUNNING;
if (ANYDEBUG) {
@@ -1001,6 +1004,7 @@
cl_make_realtime(-1, hb_realtime_prio, 16, 32);
drop_privs(0, 0); /* Become nobody */
+ cl_cdtocoredir();
curproc->pstat = RUNNING;
if (ANYDEBUG) {
@@ -3449,7 +3453,7 @@
for (j=FD_STDERR+1; j < oflimits.rlim_cur; ++j) {
close(j);
}
- chdir(HA_D);
+ cl_cdtocoredir();
/* We need to at least ignore SIGINTs early on */
hb_signal_set_common(NULL);
if (getsid(0) != pid) {
Index: include/hb_api.h
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/include/hb_api.h,v
retrieving revision 1.24
diff -u -r1.24 hb_api.h
--- include/hb_api.h 31 Aug 2004 18:39:02 -0000 1.24
+++ include/hb_api.h 15 Oct 2004 20:40:20 -0000
@@ -390,6 +390,8 @@
#define KEY_MSGFMT "msgfmt"
#define KEY_BADPACK "log_badpack"
#define KEY_REGAPPHBD "register_to_apphbd"
+#define KEY_COREDUMP "coredumps"
+#define KEY_COREROOTDIR "coreroot"
ll_cluster_t* ll_cluster_new(const char * llctype);
#endif /* __HB_API_H */
Index: lib/clplumbing/Makefile.am
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/lib/clplumbing/Makefile.am,v
retrieving revision 1.27
diff -u -r1.27 Makefile.am
--- lib/clplumbing/Makefile.am 30 Sep 2004 06:02:23 -0000 1.27
+++ lib/clplumbing/Makefile.am 15 Oct 2004 20:40:21 -0000
@@ -42,6 +42,7 @@
cl_netstring.c \
cl_poll.c \
cl_signal.c \
+ coredumps.c \
cpulimits.c \
longclock.c \
mkstemp_mode.c \
Index: membership/ccm/ccm.c
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/membership/ccm/ccm.c,v
retrieving revision 1.53
diff -u -r1.53 ccm.c
--- membership/ccm/ccm.c 8 Oct 2004 22:02:08 -0000 1.53
+++ membership/ccm/ccm.c 15 Oct 2004 20:40:28 -0000
@@ -27,6 +27,7 @@
#include <stdint.h>
#endif
#include <clplumbing/cl_signal.h>
+#include <clplumbing/coredumps.h>
extern int global_verbose;
@@ -4159,6 +4160,7 @@
ll_cluster_t* hb_fd;
ccm_t *ccmret;
int facility;
+ const char * parameter;
if(global_debug) {
cl_log(LOG_DEBUG, "========================== Starting CCM ===="
@@ -4176,6 +4178,13 @@
cl_log(LOG_ERR, "REASON: %s", hb_fd->llc_ops->errmsg(hb_fd));
return NULL;
}
+
+ /* See if we should drop cores somewhere odd... */
+ parameter = hb_fd->llc_ops->get_parameter(hb_fd, KEY_COREROOTDIR);
+ if (parameter) {
+ cl_set_corerootdir(parameter);
+ }
+ cl_cdtocoredir();
/* change the logging facility to the one used by heartbeat daemon
* the signon MUST BE FIRST! */
Index: snmp_subagent/hbagent.c
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/snmp_subagent/hbagent.c,v
retrieving revision 1.15
diff -u -r1.15 hbagent.c
--- snmp_subagent/hbagent.c 14 Oct 2004 14:17:53 -0000 1.15
+++ snmp_subagent/hbagent.c 15 Oct 2004 20:40:30 -0000
@@ -32,6 +32,7 @@
#include "hb_api.h"
#include "heartbeat.h"
#include "clplumbing/cl_log.h"
+#include "clplumbing/coredumps.h"
#include <net-snmp/net-snmp-config.h>
#include <net-snmp/net-snmp-includes.h>
@@ -359,6 +360,7 @@
int
init_heartbeat(void)
{
+ const char * parameter;
hb = NULL;
cl_log_set_entity("lha-snmpagent");
@@ -374,6 +376,13 @@
cl_log(LOG_ERR, "REASON: %s\n", hb->llc_ops->errmsg(hb));
return HA_FAIL;
}
+
+ /* See if we should drop cores somewhere odd... */
+ parameter = hb->llc_ops->get_parameter(hb, KEY_COREROOTDIR);
+ if (parameter) {
+ cl_set_corerootdir(parameter);
+ }
+ cl_cdtocoredir();
if (NULL == (myid = hb->llc_ops->get_mynodeid(hb))) {
cl_log(LOG_ERR, "Cannot get mynodeid\n");
Index: telecom/apphbd/apphbd.c
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/telecom/apphbd/apphbd.c,v
retrieving revision 1.52
diff -u -r1.52 apphbd.c
--- telecom/apphbd/apphbd.c 17 Feb 2004 22:12:01 -0000 1.52
+++ telecom/apphbd/apphbd.c 15 Oct 2004 20:40:32 -0000
@@ -90,6 +90,7 @@
#include <clplumbing/cl_signal.h>
#include <clplumbing/lsb_exitcodes.h>
+#include <clplumbing/coredumps.h>
#ifndef PIDFILE
@@ -866,13 +867,14 @@
int
main(int argc, char ** argv)
{
- int flag;
- int req_restart = FALSE;
- int req_status = FALSE;
- int req_stop = FALSE;
- int argerr = 0;
- const char* cfgfile = CONFIG_FILE;
+ int flag;
+ int req_restart = FALSE;
+ int req_status = FALSE;
+ int req_stop = FALSE;
+ int argerr = 0;
+ const char* cfgfile = CONFIG_FILE;
+ cl_cdtocoredir();
cl_log_set_entity(cmdname);
cl_log_enable_stderr(TRUE);
cl_log_set_facility(LOG_USER);
Index: telecom/recoverymgrd/recoverymgrd.c
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/telecom/recoverymgrd/recoverymgrd.c,v
retrieving revision 1.9
diff -u -r1.9 recoverymgrd.c
--- telecom/recoverymgrd/recoverymgrd.c 17 Feb 2004 22:12:02 -0000 1.9
+++ telecom/recoverymgrd/recoverymgrd.c 15 Oct 2004 20:40:34 -0000
@@ -51,6 +51,7 @@
#include <clplumbing/uids.h>
#include <clplumbing/recoverymgr_cs.h>
#include <clplumbing/lsb_exitcodes.h>
+#include <clplumbing/coredumps.h>
#include <apphb_notify.h>
#include <recoverymgr.h>
@@ -58,8 +59,8 @@
#include "configfile.h"
-/* indicates how many microseconds between heartbeats */
-#define HBINTERVAL_USEC 2000
+/* indicates how many milliseconds between heartbeats */
+#define HBINTERVAL_MSEC 2000
#define CONFIG_FILE "./recoverymgrd.conf"
@@ -123,11 +124,13 @@
}
-int main(int argc, char *argv[])
+int
+main(int argc, char *argv[])
{
int rc;
int retval = 0;
const char* conf_file = CONFIG_FILE;
+ cl_cdtocoredir();
if(argc == 2){
conf_file = argv[1];
@@ -191,7 +194,7 @@
return 1;
}
- rc = apphb_setinterval(HBINTERVAL_USEC);
+ rc = apphb_setinterval(HBINTERVAL_MSEC);
if (rc < 0)
{
cl_perror("setinterval failure");
@@ -217,9 +220,9 @@
signal(SIGALRM, (void (*) (int)) (apphb_hb));
itimerValue.it_interval.tv_sec = 0;
- itimerValue.it_interval.tv_usec = HBINTERVAL_USEC;
+ itimerValue.it_interval.tv_usec = HBINTERVAL_MSEC;
itimerValue.it_value.tv_sec = 0;
- itimerValue.it_value.tv_usec = HBINTERVAL_USEC;
+ itimerValue.it_value.tv_usec = HBINTERVAL_MSEC;
if (setitimer(ITIMER_REAL, &itimerValue, &itimerOldValue)!=0)
{
-------------- next part --------------
/*
* Basic Core dump control functions.
*
* Copyright (C) 2004 IBM Corporation
*
* This software licensed under the GNU LGPL.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#ifndef _CLPLUMBING_COREFILES_H
# define _CLPLUMBING_COREFILES_H 1
/* Set the root directory of our core directory hierarchy */
int cl_set_corerootdir(const char * dir);
/* Change directory to the directory our core file needs to go in */
/* Call after you establish the userid you're running under */
int cl_cdtocoredir(void);
/* Enable/disable core dumps for ourselves and our child processes */
int cl_enable_coredumps(int truefalse);
#endif
-------------- next part --------------
/*
* Basic Core dump control functions.
*
* Author: Alan Robertson
*
* Copyright (C) 2004 IBM Corporation
*
* This software licensed under the GNU LGPL.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <portability.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include <pwd.h>
#include <clplumbing/cl_malloc.h>
#include <clplumbing/coredumps.h>
#include <clplumbing/cl_log.h>
static char * coreroot = NULL;
/* Set the root directory of our core directory hierarchy */
int
cl_set_corerootdir(const char * dir)
{
if (dir == NULL || *dir != '/') {
cl_perror("Invalid dir in cl_set_corerootdir() [%s]"
, dir ? dir : "<NULL>");
errno = EINVAL;
return -1;
}
if (coreroot != NULL) {
cl_free(coreroot);
coreroot = NULL;
}
coreroot = cl_strdup(dir);
if (coreroot == NULL) {
return -1;
}
return 0;
}
/*
* Change directory to the directory our core file needs to go in
* Call after you establish the userid you're running under.
*/
int
cl_cdtocoredir(void)
{
const char * dir = coreroot;
int rc;
struct passwd* pwent;
if (dir == NULL) {
dir = HA_COREDIR;
}
if ((rc=chdir(dir)) < 0) {
int errsave = errno;
cl_perror("Cannot chdir to [%s]", dir);
errno = errsave;
return rc;
}
pwent = getpwuid(geteuid());
if (pwent == NULL) {
int errsave = errno;
cl_perror("Cannot get name for uid [%d]", geteuid());
errno = errsave;
return -1;
}
if ((rc=chdir(pwent->pw_name)) < 0) {
int errsave = errno;
cl_perror("Cannot chdir to [%s/%s]", dir, pwent->pw_name);
errno = errsave;
}
return rc;
}
/* Enable/disable core dumps for ourselves and our child processes */
int
cl_enable_coredumps(int doenable)
{
int rc;
struct rlimit rlim;
if ((rc = getrlimit(RLIMIT_CORE, &rlim)) < 0) {
int errsave = errno;
cl_perror("Cannot get current core limit value.");
errno = errsave;
return rc;
}
if (rlim.rlim_max == 0 && geteuid() == 0) {
rlim.rlim_max = RLIM_INFINITY;
}
rlim.rlim_cur = (doenable ? rlim.rlim_max : 0);
if (doenable && rlim.rlim_max == 0) {
cl_log(LOG_WARNING
, "Not possible to enable core dumps (rlim_max is 0)");
}
if ((rc = getrlimit(RLIMIT_CORE, &rlim)) < 0) {
int errsave = errno;
cl_perror("Unable to %s core dumps"
, doenable ? "enable" : "disable");
errno = errsave;
return rc;
}
return 0;
}
More information about the Linux-HA-Dev
mailing list