summaryrefslogtreecommitdiff
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
commit863981e96738983919de841ec669e157e6bdaeb0 (patch)
treed6d89a12e7eb8017837c057935a2271290907f76 /mm/oom_kill.c
parent8dec7c70575785729a6a9e6719a955e9c545bcab (diff)
Linux-libre 4.7.1-gnupck-4.7.1-gnu
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c172
1 files changed, 123 insertions, 49 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 86349586e..ddf74487f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -174,8 +174,13 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
if (!p)
return 0;
+ /*
+ * Do not even consider tasks which are explicitly marked oom
+ * unkillable or have been already oom reaped.
+ */
adj = (long)p->signal->oom_score_adj;
- if (adj == OOM_SCORE_ADJ_MIN) {
+ if (adj == OOM_SCORE_ADJ_MIN ||
+ test_bit(MMF_OOM_REAPED, &p->mm->flags)) {
task_unlock(p);
return 0;
}
@@ -278,12 +283,8 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
* This task already has access to memory reserves and is being killed.
* Don't allow any other task to have access to the reserves.
*/
- if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
- if (!is_sysrq_oom(oc))
- return OOM_SCAN_ABORT;
- }
- if (!task->mm)
- return OOM_SCAN_CONTINUE;
+ if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims))
+ return OOM_SCAN_ABORT;
/*
* If task is allocating a lot of memory and has been marked to be
@@ -302,12 +303,12 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
static struct task_struct *select_bad_process(struct oom_control *oc,
unsigned int *ppoints, unsigned long totalpages)
{
- struct task_struct *g, *p;
+ struct task_struct *p;
struct task_struct *chosen = NULL;
unsigned long chosen_points = 0;
rcu_read_lock();
- for_each_process_thread(g, p) {
+ for_each_process(p) {
unsigned int points;
switch (oom_scan_process_thread(oc, p, totalpages)) {
@@ -326,9 +327,6 @@ static struct task_struct *select_bad_process(struct oom_control *oc,
points = oom_badness(p, NULL, oc->nodemask, totalpages);
if (!points || points < chosen_points)
continue;
- /* Prefer thread group leaders for display purposes */
- if (points == chosen_points && thread_group_leader(chosen))
- continue;
chosen = p;
chosen_points = points;
@@ -412,6 +410,25 @@ bool oom_killer_disabled __read_mostly;
#define K(x) ((x) << (PAGE_SHIFT-10))
+/*
+ * task->mm can be NULL if the task is the exited group leader. So to
+ * determine whether the task is using a particular mm, we examine all the
+ * task's threads: if one of those is using this mm then this task was also
+ * using it.
+ */
+static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
+{
+ struct task_struct *t;
+
+ for_each_thread(p, t) {
+ struct mm_struct *t_mm = READ_ONCE(t->mm);
+ if (t_mm)
+ return t_mm == mm;
+ }
+ return false;
+}
+
+
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -422,18 +439,33 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
-
static bool __oom_reap_task(struct task_struct *tsk)
{
struct mmu_gather tlb;
struct vm_area_struct *vma;
- struct mm_struct *mm;
+ struct mm_struct *mm = NULL;
struct task_struct *p;
struct zap_details details = {.check_swap_entries = true,
.ignore_dirty = true};
bool ret = true;
/*
+ * We have to make sure to not race with the victim exit path
+ * and cause premature new oom victim selection:
+ * __oom_reap_task exit_mm
+ * atomic_inc_not_zero
+ * mmput
+ * atomic_dec_and_test
+ * exit_oom_victim
+ * [...]
+ * out_of_memory
+ * select_bad_process
+ * # no TIF_MEMDIE task selects new victim
+ * unmap_page_range # frees some memory
+ */
+ mutex_lock(&oom_lock);
+
+ /*
* Make sure we find the associated mm_struct even when the particular
* thread has already terminated and cleared its mm.
* We might have race with exit path so consider our work done if there
@@ -441,19 +473,14 @@ static bool __oom_reap_task(struct task_struct *tsk)
*/
p = find_lock_task_mm(tsk);
if (!p)
- return true;
-
+ goto unlock_oom;
mm = p->mm;
- if (!atomic_inc_not_zero(&mm->mm_users)) {
- task_unlock(p);
- return true;
- }
-
+ atomic_inc(&mm->mm_users);
task_unlock(p);
if (!down_read_trylock(&mm->mmap_sem)) {
ret = false;
- goto out;
+ goto unlock_oom;
}
tlb_gather_mmu(&tlb, mm, 0, -1);
@@ -491,16 +518,19 @@ static bool __oom_reap_task(struct task_struct *tsk)
up_read(&mm->mmap_sem);
/*
- * Clear TIF_MEMDIE because the task shouldn't be sitting on a
- * reasonably reclaimable memory anymore. OOM killer can continue
- * by selecting other victim if unmapping hasn't led to any
- * improvements. This also means that selecting this task doesn't
- * make any sense.
+ * This task can be safely ignored because we cannot do much more
+ * to release its memory.
*/
- tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN;
- exit_oom_victim(tsk);
-out:
- mmput(mm);
+ set_bit(MMF_OOM_REAPED, &mm->flags);
+unlock_oom:
+ mutex_unlock(&oom_lock);
+ /*
+ * Drop our reference but make sure the mmput slow path is called from a
+ * different context because we shouldn't risk we get stuck there and
+ * put the oom_reaper out of the way.
+ */
+ if (mm)
+ mmput_async(mm);
return ret;
}
@@ -519,6 +549,15 @@ static void oom_reap_task(struct task_struct *tsk)
debug_show_all_locks();
}
+ /*
+ * Clear TIF_MEMDIE because the task shouldn't be sitting on a
+ * reasonably reclaimable memory anymore or it is not a good candidate
+ * for the oom victim right now because it cannot release its memory
+ * itself nor by the oom reaper.
+ */
+ tsk->oom_reaper_list = NULL;
+ exit_oom_victim(tsk);
+
/* Drop a reference taken by wake_oom_reaper */
put_task_struct(tsk);
}
@@ -563,6 +602,46 @@ static void wake_oom_reaper(struct task_struct *tsk)
wake_up(&oom_reaper_wait);
}
+/* Check if we can reap the given task. This has to be called with stable
+ * tsk->mm
+ */
+void try_oom_reaper(struct task_struct *tsk)
+{
+ struct mm_struct *mm = tsk->mm;
+ struct task_struct *p;
+
+ if (!mm)
+ return;
+
+ /*
+ * There might be other threads/processes which are either not
+ * dying or even not killable.
+ */
+ if (atomic_read(&mm->mm_users) > 1) {
+ rcu_read_lock();
+ for_each_process(p) {
+ if (!process_shares_mm(p, mm))
+ continue;
+ if (fatal_signal_pending(p))
+ continue;
+
+ /*
+ * If the task is exiting make sure the whole thread group
+ * is exiting and cannot acces mm anymore.
+ */
+ if (signal_group_exit(p->signal))
+ continue;
+
+ /* Give up */
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ }
+
+ wake_oom_reaper(tsk);
+}
+
static int __init oom_init(void)
{
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -593,6 +672,7 @@ void mark_oom_victim(struct task_struct *tsk)
/* OOM killer might race with memcg OOM */
if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
return;
+ atomic_inc(&tsk->signal->oom_victims);
/*
* Make sure that the task is woken up from uninterruptible sleep
* if it is frozen because OOM killer wouldn't be able to free
@@ -610,6 +690,7 @@ void exit_oom_victim(struct task_struct *tsk)
{
if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE))
return;
+ atomic_dec(&tsk->signal->oom_victims);
if (!atomic_dec_return(&oom_victims))
wake_up_all(&oom_victims_wait);
@@ -653,24 +734,6 @@ void oom_killer_enable(void)
}
/*
- * task->mm can be NULL if the task is the exited group leader. So to
- * determine whether the task is using a particular mm, we examine all the
- * task's threads: if one of those is using this mm then this task was also
- * using it.
- */
-static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
-{
- struct task_struct *t;
-
- for_each_thread(p, t) {
- struct mm_struct *t_mm = READ_ONCE(t->mm);
- if (t_mm)
- return t_mm == mm;
- }
- return false;
-}
-
-/*
* Must be called while holding a reference to p, which will be released upon
* returning.
*/
@@ -694,6 +757,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
task_lock(p);
if (p->mm && task_will_free_mem(p)) {
mark_oom_victim(p);
+ try_oom_reaper(p);
task_unlock(p);
put_task_struct(p);
return;
@@ -873,10 +937,20 @@ bool out_of_memory(struct oom_control *oc)
if (current->mm &&
(fatal_signal_pending(current) || task_will_free_mem(current))) {
mark_oom_victim(current);
+ try_oom_reaper(current);
return true;
}
/*
+ * The OOM killer does not compensate for IO-less reclaim.
+ * pagefault_out_of_memory lost its gfp context so we have to
+ * make sure exclude 0 mask - all other users should have at least
+ * ___GFP_DIRECT_RECLAIM to get here.
+ */
+ if (oc->gfp_mask && !(oc->gfp_mask & (__GFP_FS|__GFP_NOFAIL)))
+ return true;
+
+ /*
* Check if there were limitations on the allocation (only relevant for
* NUMA) that may require different handling.
*/