From 01b40beef426d5af20f7058c049b4413dfe3e244 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 23 Apr 2018 17:50:38 +0100 Subject: [PATCH 1/6] atopacctd: close netlink socket consistently on failure path --- netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/netlink.c b/netlink.c index 6f84978e..072b5b3d 100644 --- a/netlink.c +++ b/netlink.c @@ -68,6 +68,7 @@ netlink_open(void) &cpudef, strlen(cpudef)+1) == -1) { fprintf(stderr, "register cpumask failed\n"); + close(nlsock); return -1; } @@ -152,6 +153,7 @@ nlsock_open(void) == -1) { perror("set length receive buffer"); + close(nlsock); exit(1); } From 88537719634f8557e3fb50625be6c5839fd21a07 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 24 Apr 2018 10:00:17 +0100 Subject: [PATCH 2/6] atopacctd: handle error from fork I've seen my machine fail fork() sometimes :). I don't need this, it just makes me more comfortable working on this code. --- atopacctd.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/atopacctd.c b/atopacctd.c index a0c7f441..92b46bb9 100644 --- a/atopacctd.c +++ b/atopacctd.c @@ -267,7 +267,6 @@ main(int argc, char *argv[]) memset(&sigcleanup, 0, sizeof sigcleanup); sigcleanup.sa_handler = cleanup; sigemptyset(&sigcleanup.sa_mask); - /* ** daemonize this process ** i.e. be sure that the daemon is no session leader (any more) @@ -277,10 +276,18 @@ main(int argc, char *argv[]) parentpid = getpid(); // to be killed when initialized (void) sigaction(SIGTERM, &sigcleanup, (struct sigaction *)0); - if ( fork() ) // implicitly switch to background - { + rv = fork(); + + if (rv == -1) + { + perror("cannot create process"); + exit(4); + } + + if (rv > 0) + { /* - ** parent after forking first child: + ** parent after forking first child: ** wait for signal 15 from child before terminating ** because systemd expects parent to terminate whenever ** service is up and running @@ -291,7 +298,15 @@ main(int argc, char *argv[]) setsid(); // become session leader to lose ctty - if ( fork() ) // finish parent; continue in child + rv = fork(); // finish parent; continue in child + + if (rv == -1) + { + perror("cannot create process"); + exit(4); + } + + if (rv > 0) exit(0); // --> no session leader, no ctty getrlimit(RLIMIT_NOFILE, &rlim); @@ -313,7 +328,7 @@ main(int argc, char *argv[]) { perror("cannot increment private semaphore"); kill(parentpid, SIGTERM); - exit(4); + exit(5); } /* @@ -328,7 +343,7 @@ main(int argc, char *argv[]) { perror(accountpath); kill(parentpid, SIGTERM); - exit(5); + exit(6); } (void) close(afd); @@ -340,7 +355,7 @@ main(int argc, char *argv[]) { perror(accountpath); kill(parentpid, SIGTERM); - exit(5); + exit(6); } /* @@ -352,7 +367,7 @@ main(int argc, char *argv[]) { perror(shadowdir); kill(parentpid, SIGTERM); - exit(5); + exit(6); } sfd = createshadow(curshadow); @@ -378,7 +393,7 @@ main(int argc, char *argv[]) { (void) unlink(accountpath); kill(parentpid, SIGTERM); - exit(5); + exit(6); } /* @@ -388,7 +403,7 @@ main(int argc, char *argv[]) { (void) unlink(accountpath); kill(parentpid, SIGTERM); - exit(6); + exit(7); } syslog(LOG_INFO, "accounting to %s", accountpath); From 552c05cc3a042c21f29d57a8f76fc833f0e3a44b Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 24 Apr 2018 14:48:12 +0100 Subject: [PATCH 3/6] atopacctd: fix service readiness, avoid hang The parent process waited to be killed. The problem is that unexpected termination of the child will cause the parent to hang. This fixes a hang when running atopacctd in a systemd-nspawn container. It now exits immediately with an error. The problem was that netlink_open() calls exit() on failure, without killing the parent process. We also now avoid any possibility of killing an unexpected process, in case the *parent* process is terminated unexpectedly and its PID is re-used. These are two examples of why message-passing with a pipe is preferable to working with kill(). A systemd-specific comment is modified. AFAICT, a service readiness protocol is always necessary when you have another service depending on you (and you do not use socket activation). You might think atopacctd would be a candidate for such... In which case, systemd checking for a pidfile ASAP just provides a sanity check that was missing from historical implementations, highlighting that many daemons were not strictly implementing a service readiness protocol, and hence many boot "sequences" were actually a pile of race conditions. Official systemd propaganda for this is currently available[1]; it explicitly suggests the pipe approach. [1] https://www.freedesktop.org/software/systemd/man/daemon.html I'm not really clear from my reading, as to what extent this applied to atopacctd. For all I know, atop doesn't mind if atopacctd is started after it and atop manages to connect immediately when atopacctd is ready. However IMO this commit is a well-defined approach, and it solves a genuine annoyance. --- atopacctd.c | 54 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/atopacctd.c b/atopacctd.c index 92b46bb9..e8b5f096 100644 --- a/atopacctd.c +++ b/atopacctd.c @@ -119,11 +119,12 @@ int netlink_recv(int, int); // from netlink.c int main(int argc, char *argv[]) { - int i, nfd, afd, sfd; - int parentpid; + int i, rv, nfd, afd, sfd; struct stat dirstat; struct rlimit rlim; FILE *pidf; + int readypipe[2]; + int8_t ready; struct sembuf semincr = {0, +1, SEM_UNDO}; @@ -261,20 +262,17 @@ main(int argc, char *argv[]) } } - /* - ** prepare cleanup signal handler - */ - memset(&sigcleanup, 0, sizeof sigcleanup); - sigcleanup.sa_handler = cleanup; - sigemptyset(&sigcleanup.sa_mask); /* ** daemonize this process ** i.e. be sure that the daemon is no session leader (any more) ** and get rid of a possible bad context that might have been ** inherited from ancestors */ - parentpid = getpid(); // to be killed when initialized - (void) sigaction(SIGTERM, &sigcleanup, (struct sigaction *)0); + if ( pipe(readypipe) == -1) + { + perror("cannot create readiness pipe"); + exit(4); + } rv = fork(); @@ -288,14 +286,24 @@ main(int argc, char *argv[]) { /* ** parent after forking first child: - ** wait for signal 15 from child before terminating - ** because systemd expects parent to terminate whenever - ** service is up and running + ** wait for message from child before terminating + ** because dependent services need parent to terminate + ** whenever service is up and running */ - pause(); // wait for signal from child - exit(0); // finish parent + + (void) close(readypipe[1]); // close write end of pipe + + rv = read(readypipe[0], &ready, sizeof(ready)); + + if (rv == sizeof(ready)) + exit(0); // service is ready now + else + exit(4); // service exited } + (void) close(readypipe[0]); // close read end of pipe + readypipe[0] = -1; + setsid(); // become session leader to lose ctty rv = fork(); // finish parent; continue in child @@ -327,7 +335,6 @@ main(int argc, char *argv[]) if ( semop(semprv, &semincr, 1) == -1) { perror("cannot increment private semaphore"); - kill(parentpid, SIGTERM); exit(5); } @@ -342,7 +349,6 @@ main(int argc, char *argv[]) if ( (afd = creat(accountpath, 0600)) == -1) { perror(accountpath); - kill(parentpid, SIGTERM); exit(6); } @@ -354,7 +360,6 @@ main(int argc, char *argv[]) if ( (afd = open(accountpath, O_RDONLY)) == -1) { perror(accountpath); - kill(parentpid, SIGTERM); exit(6); } @@ -366,7 +371,6 @@ main(int argc, char *argv[]) if ( mkdir(shadowdir, 0755) == -1) { perror(shadowdir); - kill(parentpid, SIGTERM); exit(6); } @@ -392,7 +396,6 @@ main(int argc, char *argv[]) if ( (nfd = netlink_open()) == -1) { (void) unlink(accountpath); - kill(parentpid, SIGTERM); exit(6); } @@ -402,7 +405,6 @@ main(int argc, char *argv[]) if ( swonpacct(afd, accountpath) == -1) { (void) unlink(accountpath); - kill(parentpid, SIGTERM); exit(7); } @@ -413,6 +415,10 @@ main(int argc, char *argv[]) */ (void) signal(SIGHUP, SIG_IGN); + memset(&sigcleanup, 0, sizeof sigcleanup); + sigcleanup.sa_handler = cleanup; + sigemptyset(&sigcleanup.sa_mask); + (void) sigaction(SIGINT, &sigcleanup, (struct sigaction *)0); (void) sigaction(SIGQUIT, &sigcleanup, (struct sigaction *)0); (void) sigaction(SIGTERM, &sigcleanup, (struct sigaction *)0); @@ -427,9 +433,11 @@ main(int argc, char *argv[]) } /* - ** terminate parent: service initialized + ** notify parent: service initialized */ - kill(parentpid, SIGTERM); + ready = 1; + (void) write(readypipe[1], &ready, sizeof(ready)); + (void) close(readypipe[1]); /* ** main loop From 6f9b5c2b0766a8b483e9daabf23fbe35c2d3a4be Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 24 Apr 2018 11:57:47 +0100 Subject: [PATCH 4/6] atopacctd: provide strerror for "receive NETLINK family" I tried running atopacctd in my systemd-nspawn container. Let's provide a textual error description, which is easier to look up (google). Before: receive NETLINK family, errno -2 After: get NETLINK family for taskstats: No such file or directory Also clarifies where the failure is: we do have NETLINK, but we don't have the NETLINK family for taskstats. Google won't find this commit message. But for reference, the kernel code which generates this error in ctrl_getfamily() is: if (!res->netnsok && !net_eq(genl_info_net(info), &init_net)) { /* family doesn't exist here */ return -ENOENT; } Amusingly, it is indeed permitted to send TASKSTATS_CMD_ATTR_REGISTER_CPUMASK inside a container which shares the init netns (which of course does not share the pid namespace). Dunno if this would considered an info leak, but at least acct() fails in this environment. --- netlink.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/netlink.c b/netlink.c index 072b5b3d..55477b22 100644 --- a/netlink.c +++ b/netlink.c @@ -109,7 +109,7 @@ nlsock_getfam(int nlsock) if ( (len = recv(nlsock, &msg, sizeof msg, 0)) == -1) { - perror("receive NETLINK family"); + perror("receive NETLINK family for taskstats"); exit(1); } @@ -117,9 +117,10 @@ nlsock_getfam(int nlsock) { struct nlmsgerr *err = NLMSG_DATA(&msg); - fprintf(stderr, "receive NETLINK family, errno %d\n", - err->error); + /* nlmsgerr holds a negative errno */ + errno = -err->error; + perror("get NETLINK family for taskstats"); exit(1); } From 6fb86082b6ab01bdcc3490151067ea7dc6df0956 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 24 Apr 2018 15:28:00 +0100 Subject: [PATCH 5/6] atopacctd: light hack around missing cleanup in error path Imaging installing atop in a systemd-nspawn container, enabling atopacctd, and rebooting. atopacctd will fail to start in the container. However, if you try to run it manually, in order to see the error message or to troubleshoot it with `strace`, it will then start failing with a *different* error. /var/run/pacct_shadow.d: File exists This is an unfortunate obstacle to troubleshooting. Let's fix it... Currently we don't have code to clean `pacct_shadow.d/` in the event of atopacctd crashing. And the failure during startup behaves similar to a crash. This commit provides a workaround. If `pacct_shadow.d` already exists, then ignore the mkdir() failure. If it's not a directory like we need, we'll notice a failure later on anyway. --- atopacctd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/atopacctd.c b/atopacctd.c index e8b5f096..63bce347 100644 --- a/atopacctd.c +++ b/atopacctd.c @@ -364,11 +364,11 @@ main(int argc, char *argv[]) } /* - ** create directory to store the shadow files + ** create directory to store the shadow files */ snprintf(shadowdir, sizeof shadowdir, "%s/%s", pacctdir, PACCTSHADOWD); - if ( mkdir(shadowdir, 0755) == -1) + if ( mkdir(shadowdir, 0755) == -1 && errno != EEXIST) { perror(shadowdir); exit(6); From 310a3723415ba53743e5e92f8688533986f1fba0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 24 Apr 2018 17:16:47 +0100 Subject: [PATCH 6/6] atopacct.service: don't run on containers Let's skip on containers here, at least for now. Currently, Linux Containers don't support process accounting (and failed services are painful for Debian-style packages which start the service at install time). Quietly ignoring a service like this, is a big part of how systemd manages to run standard distributions inside a container (e.g. skipping systemd-udev). (The earlier approach with LXC was for the container to configure^Wpatch the distribution into submission... it makes me not inclined to feel very guilty for not having an idea for the _sysvinit_ script). I'm not sure how useful atop is in containers in practice... however it can be used as a cheaper way to inspect atop scripts etc. used by a specific distribution, or developing configuration management to install atop. I believe there's a non-zero population who have used systemd-nspawn as "chroot on steroids." --- atopacct.service | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/atopacct.service b/atopacct.service index 1f51ec41..9065ada0 100644 --- a/atopacct.service +++ b/atopacct.service @@ -5,6 +5,11 @@ Conflicts=psacct.service After=syslog.target Before=atop.service +# Let's skip on containers here, at least for now. +# Currently, Linux Containers don't support process accounting +# (and failed services are painful for Debian-style packages). +ConditionVirtualization=!container + [Service] Type=forking PIDFile=/var/run/atopacctd.pid