core: GC redundant device jobs from the run queue

In contrast to all other unit types device units when queued just track external state, they cannot effect state changes on their own. Hence unless a client or other job waits for them there's no reason to keep them in the job queue. This adds a concept of GC'ing jobs of this type as soon as no client or other job waits for them anymore. To ensure this works correctly we need to track which clients actually reference a job (i.e. which ones enqueued it). Unfortunately that's pretty nasty to do for direct connections, as sd_bus_track doesn't work for them. For now, work around this, by simply remembering in a boolean that a job was requested by a direct connection, and reset it when we notice the direct connection is gone. This means the GC logic works fine, except that jobs are not immediately removed when direct connections disconnect. In the longer term, a rework of the bus logic should fix this properly. For now this should be good enough, as GC works for fine all cases except this one, and thus is a clear improvement over the previous behaviour. Fixes: #1921
author: Lennart Poettering <lennart@poettering.net> 2016-11-15 19:32:50 +0100
committer: Lennart Poettering <lennart@poettering.net> 2016-11-16 15:03:26 +0100
commit: c5a97ed132b400ad82f7939d55fe1027a2b13f6e (patch)
tree: 924df71be2f04a66a30faccc54354d34c94f42ff /src/core/job.c
parent: 1a465207ab0a0b6756ab0d9305102d9159955a14 (diff)
1 files changed, 110 insertions, 4 deletions
diff --git a/src/core/job.c b/src/core/job.c
index 3a20da6d06..d6e71d68ef 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -90,6 +90,9 @@ void job_free(Job *j) {
         if (j->in_dbus_queue)
                 LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
 
+        if (j->in_gc_queue)
+                LIST_REMOVE(gc_queue, j->manager->gc_job_queue, j);
+
         sd_event_source_unref(j->timer_event_source);
 
         sd_bus_track_unref(j->bus_track);
@@ -226,6 +229,9 @@ Job* job_install(Job *j) {
         log_unit_debug(j->unit,
                        "Installed new job %s/%s as %u",
                        j->unit->id, job_type_to_string(j->type), (unsigned) j->id);
+
+        job_add_to_gc_queue(j);
+
         return j;
 }
 
@@ -639,6 +645,7 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
                 [JOB_DEPENDENCY]  = "Dependency failed for %s.",
                 [JOB_ASSERT]      = "Assertion failed for %s.",
                 [JOB_UNSUPPORTED] = "Starting of %s not supported.",
+                [JOB_COLLECTED]   = "Unecessary job for %s was removed.",
         };
         static const char *const generic_finished_stop_job[_JOB_RESULT_MAX] = {
                 [JOB_DONE]        = "Stopped %s.",
@@ -698,6 +705,7 @@ static void job_print_status_message(Unit *u, JobType t, JobResult result) {
                 [JOB_SKIPPED]     = { ANSI_HIGHLIGHT,        " INFO " },
                 [JOB_ASSERT]      = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
                 [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
+                [JOB_COLLECTED]   = { ANSI_HIGHLIGHT,        " INFO " },
         };
 
         const char *format;
@@ -749,6 +757,7 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
                 [JOB_INVALID]     = LOG_INFO,
                 [JOB_ASSERT]      = LOG_WARNING,
                 [JOB_UNSUPPORTED] = LOG_WARNING,
+                [JOB_COLLECTED]   = LOG_INFO,
         };
 
         assert(u);
@@ -860,6 +869,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
                 job_set_state(j, JOB_WAITING);
 
                 job_add_to_run_queue(j);
+                job_add_to_gc_queue(j);
 
                 goto finish;
         }
@@ -903,11 +913,15 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
 finish:
         /* Try to start the next jobs that can be started */
         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
-                if (other->job)
+                if (other->job) {
                         job_add_to_run_queue(other->job);
+                        job_add_to_gc_queue(other->job);
+                }
         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
-                if (other->job)
+                if (other->job) {
                         job_add_to_run_queue(other->job);
+                        job_add_to_gc_queue(other->job);
+                }
 
         manager_check_finished(u->manager);
 
@@ -1121,12 +1135,14 @@ int job_coldplug(Job *j) {
 
         /* After deserialization is complete and the bus connection
          * set up again, let's start watching our subscribers again */
-        (void) bus_track_coldplug(j->manager, &j->bus_track, false, j->deserialized_clients);
-        j->deserialized_clients = strv_free(j->deserialized_clients);
+        (void) bus_job_coldplug_bus_track(j);
 
         if (j->state == JOB_WAITING)
                 job_add_to_run_queue(j);
 
+        /* Maybe due to new dependencies we don't actually need this job anymore? */
+        job_add_to_gc_queue(j);
+
         if (j->begin_usec == 0 || j->unit->job_timeout == USEC_INFINITY)
                 return 0;
 
@@ -1201,6 +1217,95 @@ int job_get_timeout(Job *j, usec_t *timeout) {
         return 1;
 }
 
+bool job_check_gc(Job *j) {
+        Unit *other;
+        Iterator i;
+
+        assert(j);
+
+        /* Checks whether this job should be GC'ed away. We only do this for jobs of units that have no effect on their
+         * own and just track external state. For now the only unit type that qualifies for this are .device units. */
+
+        if (!UNIT_VTABLE(j->unit)->gc_jobs)
+                return true;
+
+        if (sd_bus_track_count(j->bus_track) > 0)
+                return true;
+
+        /* FIXME: So this is a bit ugly: for now we don't properly track references made via private bus connections
+         * (because it's nasty, as sd_bus_track doesn't apply to it). We simply remember that the job was once
+         * referenced by one, and reset this whenever we notice that no private bus connections are around. This means
+         * the GC is a bit too conservative when it comes to jobs created by private bus connections. */
+        if (j->ref_by_private_bus) {
+                if (set_isempty(j->unit->manager->private_buses))
+                        j->ref_by_private_bus = false;
+                else
+                        return true;
+        }
+
+        if (j->type == JOB_NOP)
+                return true;
+
+        /* If a job is ordered after ours, and is to be started, then it needs to wait for us, regardless if we stop or
+         * start, hence let's not GC in that case. */
+        SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i) {
+                if (!other->job)
+                        continue;
+
+                if (other->job->ignore_order)
+                        continue;
+
+                if (IN_SET(other->job->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD))
+                        return true;
+        }
+
+        /* If we are going down, but something else is orederd After= us, then it needs to wait for us */
+        if (IN_SET(j->type, JOB_STOP, JOB_RESTART)) {
+
+                SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i) {
+                        if (!other->job)
+                                continue;
+
+                        if (other->job->ignore_order)
+                                continue;
+
+                        return true;
+                }
+        }
+
+        /* The logic above is kinda the inverse of the job_is_runnable() logic. Specifically, if the job "we" is
+         * ordered before the job "other":
+         *
+         *  we start + other start → stay
+         *  we start + other stop  → gc
+         *  we stop  + other start → stay
+         *  we stop  + other stop  → gc
+         *
+         * "we" are ordered after "other":
+         *
+         *  we start + other start → gc
+         *  we start + other stop  → gc
+         *  we stop  + other start → stay
+         *  we stop  + other stop  → stay
+         *
+         */
+
+        return false;
+}
+
+void job_add_to_gc_queue(Job *j) {
+        assert(j);
+
+        if (j->in_gc_queue)
+                return;
+
+        if (job_check_gc(j))
+                return;
+
+        LIST_PREPEND(gc_queue, j->unit->manager->gc_job_queue, j);
+        j->in_gc_queue = true;
+}
+
 static const char* const job_state_table[_JOB_STATE_MAX] = {
         [JOB_WAITING] = "waiting",
         [JOB_RUNNING] = "running",
@@ -1244,6 +1349,7 @@ static const char* const job_result_table[_JOB_RESULT_MAX] = {
         [JOB_INVALID] = "invalid",
         [JOB_ASSERT] = "assert",
         [JOB_UNSUPPORTED] = "unsupported",
+        [JOB_COLLECTED] = "collected",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);
author	Lennart Poettering <lennart@poettering.net>	2016-11-15 19:32:50 +0100
committer	Lennart Poettering <lennart@poettering.net>	2016-11-16 15:03:26 +0100
commit	c5a97ed132b400ad82f7939d55fe1027a2b13f6e (patch)
tree	924df71be2f04a66a30faccc54354d34c94f42ff /src/core/job.c
parent	1a465207ab0a0b6756ab0d9305102d9159955a14 (diff)