shared: add generic IPC barrier

The "Barrier" object is a simple inter-process barrier implementation. It allows placing synchronization points and waiting for the other side to reach it. Additionally, it has an abortion-mechanism as second-layer synchronization to send abortion-events asynchronously to the other side. The API is usually used to synchronize processes during fork(). However, it can be extended to pass state through execve() so you could synchronize beyond execve(). Usually, it's used like this (error-handling replaced by assert() for simplicity): Barrier b; r = barrier_init(&b); assert_se(r >= 0); pid = fork(); assert_se(pid >= 0); if (pid == 0) { barrier_set_role(&b, BARRIER_CHILD); ...do child post-setup... if (CHILD_SETUP_FAILED) exit(1); ...child setup done... barrier_place(&b); if (!barrier_sync(&b)) { /* parent setup failed */ exit(1); } barrier_destroy(&b); /* redundant as execve() and exit() imply this */ /* parent & child setup successful */ execve(...); } barrier_set_role(&b, BARRIER_PARENT); ...do parent post-setup... if (PARENT_SETUP_FAILED) { barrier_abort(&b); /* send abortion event */ barrier_wait_abortion(&b); /* wait for child to abort (exit() implies abortion) */ barrier_destroy(&b); ...bail out... } ...parent setup done... barrier_place(&b); if (!barrier_sync(&b)) { ...child setup failed... ; barrier_destroy(&b); ...bail out... } barrier_destroy(&b); ...child setup successfull... This is the most basic API. Using barrier_place() to place barriers and barrier_sync() to perform a full synchronization between both processes. barrier_abort() places an abortion barrier which superceeds any other barriers, exit() (or barrier_destroy()) places an abortion-barrier that queues behind existing barriers (thus *not* replacing existing barriers unlike barrier_abort()). This example uses hard-synchronization with wait_abortion(), sync() and friends. These are all optional. Barriers are highly dynamic and can be used for one-way synchronization or even no synchronization at all (postponing it for later). The sync() call performs a full two-way synchronization. The API is documented and should be fairly self-explanatory. A test-suite shows some special semantics regarding abortion, wait_next() and exit(). Internally, barriers use two eventfds and a pipe. The pipe is used to detect exit()s of the remote side as eventfds do not allow that. The eventfds are used to place barriers, one for each side. Barriers itself are numbered, but the numbers are reused once both sides reached the same barrier, thus you cannot address barriers by the index. Moreover, the numbering is implicit and we only store a counter. This makes the implementation itself very lightweight, which is probably negligible considering that we need 3 FDs for a barrier.. Last but not least: This barrier implementation is quite heavy. It's definitely not meant for fast IPC synchronization. However, it's very easy to use. And given the *HUGE* overhead of fork(), the barrier-overhead should be negligible.
author: David Herrmann <dh.herrmann@gmail.com> 2014-07-10 15:25:47 +0200
committer: David Herrmann <dh.herrmann@gmail.com> 2014-07-17 11:34:00 +0200
commit: 279da1e3f99b9c767a69849b5445e3cfd8d83376 (patch)
tree: 23aa7a51fd77d1408fa21be1d1a7d78e3ca5366b /src/test/test-barrier.c
parent: 18abe7bd3e13525b257da69ac49ff7841c289567 (diff)
1 files changed, 460 insertions, 0 deletions
diff --git a/src/test/test-barrier.c b/src/test/test-barrier.c
new file mode 100644
index 0000000000..640e508679
--- /dev/null
+++ b/src/test/test-barrier.c
@@ -0,0 +1,460 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/*
+ * IPC barrier tests
+ * These tests verify the correct behavior of the IPC Barrier implementation.
+ * Note that the tests use alarm-timers to verify dead-locks and timeouts. These
+ * might not work on slow machines where 20ms are too short to perform specific
+ * operations (though, very unlikely). In case that turns out true, we have to
+ * increase it at the slightly cost of lengthen test-duration on other machines.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "def.h"
+#include "util.h"
+
+/* 20ms to test deadlocks; All timings use multiples of this constant as
+ * alarm/sleep timers. If this timeout is too small for slow machines to perform
+ * the requested operations, we have to increase it. On an i7 this works fine
+ * with 1ms base-time, so 20ms should be just fine for everyone. */
+#define BASE_TIME 20
+
+static void malarm(unsigned long msecs) {
+        struct itimerval v = { };
+
+        timeval_store(&v.it_value, msecs * USEC_PER_MSEC);
+        assert_se(setitimer(ITIMER_REAL, &v, NULL) >= 0);
+}
+
+static void msleep(unsigned long msecs) {
+        assert_se(msecs < MSEC_PER_SEC);
+        usleep(msecs * USEC_PER_MSEC);
+}
+
+#define TEST_BARRIER(_FUNCTION, _CHILD_CODE, _WAIT_CHILD, _PARENT_CODE, _WAIT_PARENT)  \
+        static void _FUNCTION(void) {                                   \
+                Barrier b;                                              \
+                pid_t pid1, pid2;                                       \
+                                                                        \
+                assert_se(barrier_init(&b) >= 0);                       \
+                                                                        \
+                pid1 = fork();                                          \
+                assert_se(pid1 >= 0);                                   \
+                if (pid1 == 0) {                                        \
+                        barrier_set_role(&b, BARRIER_CHILD);            \
+                        { _CHILD_CODE; }                                \
+                        exit(42);                                       \
+                }                                                       \
+                                                                        \
+                pid2 = fork();                                          \
+                assert_se(pid2 >= 0);                                   \
+                if (pid2 == 0) {                                        \
+                        barrier_set_role(&b, BARRIER_PARENT);           \
+                        { _PARENT_CODE; }                               \
+                        exit(42);                                       \
+                }                                                       \
+                                                                        \
+                barrier_destroy(&b);                                    \
+                malarm(999);                                            \
+                { _WAIT_CHILD; }                                        \
+                { _WAIT_PARENT; }                                       \
+                malarm(0);                                              \
+        }
+
+#define TEST_BARRIER_WAIT_SUCCESS(_pid) \
+                ({                                                      \
+                        int pidr, status;                               \
+                        pidr = waitpid(_pid, &status, 0);               \
+                        assert_se(pidr == _pid);                        \
+                        assert_se(WIFEXITED(status));                   \
+                        assert_se(WEXITSTATUS(status) == 42);           \
+                })
+
+#define TEST_BARRIER_WAIT_ALARM(_pid) \
+                ({                                                      \
+                        int pidr, status;                               \
+                        pidr = waitpid(_pid, &status, 0);               \
+                        assert_se(pidr == _pid);                        \
+                        assert_se(WIFSIGNALED(status));                 \
+                        assert_se(WTERMSIG(status) == SIGALRM);         \
+                })
+
+/*
+ * Test basic sync points
+ * This places a barrier in both processes and waits synchronously for them.
+ * The timeout makes sure the sync works as expected. The msleep() on one side
+ * makes sure the exit of the parent does not overwrite previous barriers. Due
+ * to the msleep(), we know that the parent already exited, thus there's a
+ * pending HUP on the pipe. However, the barrier_sync() prefers reads on the
+ * eventfd, thus we can safely wait on the barrier.
+ */
+TEST_BARRIER(test_barrier_sync,
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                msleep(BASE_TIME * 2);
+                assert_se(barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next()
+ * This places a barrier in the parent and syncs on it. The child sleeps while
+ * the parent places the barrier and then waits for a barrier. The wait will
+ * succeed as the child hasn't read the parent's barrier, yet. The following
+ * barrier and sync synchronize the exit.
+ */
+TEST_BARRIER(test_barrier_wait_next,
+        ({
+                msleep(100);
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_wait_next(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                malarm(400);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() multiple times
+ * This places two barriers in the parent and waits for the child to exit. The
+ * child sleeps 20ms so both barriers _should_ be in place. It then waits for
+ * the parent to place the next barrier twice. The first call will fetch both
+ * barriers and return. However, the second call will stall as the parent does
+ * not place a 3rd barrier (the sleep caught two barriers). wait_next() is does
+ * not look at barrier-links so this stall is expected. Thus this test times
+ * out.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice,
+        ({
+                msleep(BASE_TIME);
+                malarm(BASE_TIME);
+                assert_se(barrier_wait_next(&b));
+                assert_se(barrier_wait_next(&b));
+                assert_se(0);
+        }),
+        TEST_BARRIER_WAIT_ALARM(pid1),
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+                msleep(BASE_TIME * 2);
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with local barriers
+ * This is the same as test_barrier_wait_next_twice, but places local barriers
+ * between both waits. This does not have any effect on the wait so it times out
+ * like the other test.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_local,
+        ({
+                msleep(BASE_TIME);
+                malarm(BASE_TIME);
+                assert_se(barrier_wait_next(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_wait_next(&b));
+                assert_se(0);
+        }),
+        TEST_BARRIER_WAIT_ALARM(pid1),
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+                msleep(BASE_TIME * 2);
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with sync_next()
+ * This is again the same as test_barrier_wait_next_twice but uses a
+ * synced wait as the second wait. This works just fine because the local state
+ * has no barriers placed, therefore, the remote is always in sync.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_sync,
+        ({
+                msleep(BASE_TIME);
+                malarm(BASE_TIME);
+                assert_se(barrier_wait_next(&b));
+                assert_se(barrier_sync_next(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with sync_next() and local barriers
+ * This is again the same as test_barrier_wait_next_twice_local but uses a
+ * synced wait as the second wait. This works just fine because the local state
+ * is in sync with the remote.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_local_sync,
+        ({
+                msleep(BASE_TIME);
+                malarm(BASE_TIME);
+                assert_se(barrier_wait_next(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync_next(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync()
+ * This tests sync_*() synchronizations and makes sure they work fine if the
+ * local state is behind the remote state.
+ */
+TEST_BARRIER(test_barrier_sync_next,
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_sync_next(&b));
+                assert_se(barrier_sync(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync_next(&b));
+                assert_se(barrier_sync_next(&b));
+                assert_se(barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                malarm(BASE_TIME * 10);
+                msleep(BASE_TIME);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync() with local barriers
+ * This tests timeouts if sync_*() is used if local barriers are placed but the
+ * remote didn't place any.
+ */
+TEST_BARRIER(test_barrier_sync_next_local,
+        ({
+                malarm(BASE_TIME);
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync_next(&b));
+                assert_se(0);
+        }),
+        TEST_BARRIER_WAIT_ALARM(pid1),
+        ({
+                msleep(BASE_TIME * 2);
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync() with local barriers and abortion
+ * This is the same as test_barrier_sync_next_local but aborts the sync in the
+ * parent. Therefore, the sync_next() succeeds just fine due to the abortion.
+ */
+TEST_BARRIER(test_barrier_sync_next_local_abort,
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                assert_se(!barrier_sync_next(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                assert_se(barrier_abort(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test matched wait_abortion()
+ * This runs wait_abortion() with remote abortion.
+ */
+TEST_BARRIER(test_barrier_wait_abortion,
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_wait_abortion(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                assert_se(barrier_abort(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test unmatched wait_abortion()
+ * This runs wait_abortion() without any remote abortion going on. It thus must
+ * timeout.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_unmatched,
+        ({
+                malarm(BASE_TIME);
+                assert_se(barrier_wait_abortion(&b));
+                assert_se(0);
+        }),
+        TEST_BARRIER_WAIT_ALARM(pid1),
+        ({
+                msleep(BASE_TIME * 2);
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test matched wait_abortion() with local abortion
+ * This runs wait_abortion() with local and remote abortion.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_local,
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_abort(&b));
+                assert_se(!barrier_wait_abortion(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                assert_se(barrier_abort(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test unmatched wait_abortion() with local abortion
+ * This runs wait_abortion() with only local abortion. This must time out.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_local_unmatched,
+        ({
+                malarm(BASE_TIME);
+                assert_se(barrier_abort(&b));
+                assert_se(!barrier_wait_abortion(&b));
+                assert_se(0);
+        }),
+        TEST_BARRIER_WAIT_ALARM(pid1),
+        ({
+                msleep(BASE_TIME * 2);
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test child exit
+ * Place barrier and sync with the child. The child only exits()s, which should
+ * cause an implicit abortion and wake the parent.
+ */
+TEST_BARRIER(test_barrier_exit,
+        ({
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                malarm(BASE_TIME * 10);
+                assert_se(barrier_place(&b));
+                assert_se(!barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test child exit with sleep
+ * Same as test_barrier_exit but verifies the test really works due to the
+ * child-exit. We add a usleep() which triggers the alarm in the parent and
+ * causes the test to time out.
+ */
+TEST_BARRIER(test_barrier_no_exit,
+        ({
+                msleep(BASE_TIME * 2);
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                malarm(BASE_TIME);
+                assert_se(barrier_place(&b));
+                assert_se(!barrier_sync(&b));
+        }),
+        TEST_BARRIER_WAIT_ALARM(pid2));
+
+/*
+ * Test pending exit against sync
+ * The parent places a barrier *and* exits. The 20ms wait in the child
+ * guarantees both are pending. However, our logic prefers pending barriers over
+ * pending exit-abortions (unlike normal abortions), thus the wait_next() must
+ * succeed, same for the sync_next() as our local barrier-count is smaller than
+ * the remote. Once we place a barrier our count is equal, so the sync still
+ * succeeds. Only if we place one more barrier, we're ahead of the remote, thus
+ * we will fail due to HUP on the pipe.
+ */
+TEST_BARRIER(test_barrier_pending_exit,
+        ({
+                malarm(BASE_TIME * 4);
+                msleep(BASE_TIME * 2);
+                assert_se(barrier_wait_next(&b));
+                assert_se(barrier_sync_next(&b));
+                assert_se(barrier_place(&b));
+                assert_se(barrier_sync_next(&b));
+                assert_se(barrier_place(&b));
+                assert_se(!barrier_sync_next(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid1),
+        ({
+                assert_se(barrier_place(&b));
+        }),
+        TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+int main(int argc, char *argv[]) {
+        log_parse_environment();
+        log_open();
+
+        test_barrier_sync();
+        test_barrier_wait_next();
+        test_barrier_wait_next_twice();
+        test_barrier_wait_next_twice_sync();
+        test_barrier_wait_next_twice_local();
+        test_barrier_wait_next_twice_local_sync();
+        test_barrier_sync_next();
+        test_barrier_sync_next_local();
+        test_barrier_sync_next_local_abort();
+        test_barrier_wait_abortion();
+        test_barrier_wait_abortion_unmatched();
+        test_barrier_wait_abortion_local();
+        test_barrier_wait_abortion_local_unmatched();
+        test_barrier_exit();
+        test_barrier_no_exit();
+        test_barrier_pending_exit();
+
+        return 0;
+}
author	David Herrmann <dh.herrmann@gmail.com>	2014-07-10 15:25:47 +0200
committer	David Herrmann <dh.herrmann@gmail.com>	2014-07-17 11:34:00 +0200
commit	279da1e3f99b9c767a69849b5445e3cfd8d83376 (patch)
tree	23aa7a51fd77d1408fa21be1d1a7d78e3ca5366b /src/test/test-barrier.c
parent	18abe7bd3e13525b257da69ac49ff7841c289567 (diff)