diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 219 |
1 files changed, 197 insertions, 22 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dc490c069..86349586e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -35,6 +35,11 @@ #include <linux/freezer.h> #include <linux/ftrace.h> #include <linux/ratelimit.h> +#include <linux/kthread.h> +#include <linux/init.h> + +#include <asm/tlb.h> +#include "internal.h" #define CREATE_TRACE_POINTS #include <trace/events/oom.h> @@ -287,9 +292,6 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, if (oom_task_origin(task)) return OOM_SCAN_SELECT; - if (task_will_free_mem(task) && !is_sysrq_oom(oc)) - return OOM_SCAN_ABORT; - return OOM_SCAN_OK; } @@ -386,10 +388,10 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) static void dump_header(struct oom_control *oc, struct task_struct *p, struct mem_cgroup *memcg) { - pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " - "oom_score_adj=%hd\n", - current->comm, oc->gfp_mask, oc->order, + pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n", + current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, current->signal->oom_score_adj); + cpuset_print_current_mems_allowed(); dump_stack(); if (memcg) @@ -408,6 +410,176 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); bool oom_killer_disabled __read_mostly; +#define K(x) ((x) << (PAGE_SHIFT-10)) + +#ifdef CONFIG_MMU +/* + * OOM Reaper kernel thread which tries to reap the memory used by the OOM + * victim (if that is possible) to help the OOM killer to move on. + */ +static struct task_struct *oom_reaper_th; +static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); +static struct task_struct *oom_reaper_list; +static DEFINE_SPINLOCK(oom_reaper_lock); + + +static bool __oom_reap_task(struct task_struct *tsk) +{ + struct mmu_gather tlb; + struct vm_area_struct *vma; + struct mm_struct *mm; + struct task_struct *p; + struct zap_details details = {.check_swap_entries = true, + .ignore_dirty = true}; + bool ret = true; + + /* + * Make sure we find the associated mm_struct even when the particular + * thread has already terminated and cleared its mm. + * We might have race with exit path so consider our work done if there + * is no mm. + */ + p = find_lock_task_mm(tsk); + if (!p) + return true; + + mm = p->mm; + if (!atomic_inc_not_zero(&mm->mm_users)) { + task_unlock(p); + return true; + } + + task_unlock(p); + + if (!down_read_trylock(&mm->mmap_sem)) { + ret = false; + goto out; + } + + tlb_gather_mmu(&tlb, mm, 0, -1); + for (vma = mm->mmap ; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + /* + * mlocked VMAs require explicit munlocking before unmap. + * Let's keep it simple here and skip such VMAs. + */ + if (vma->vm_flags & VM_LOCKED) + continue; + + /* + * Only anonymous pages have a good chance to be dropped + * without additional steps which we cannot afford as we + * are OOM already. + * + * We do not even care about fs backed pages because all + * which are reclaimable have already been reclaimed and + * we do not want to block exit_mmap by keeping mm ref + * count elevated without a good reason. + */ + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) + unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, + &details); + } + tlb_finish_mmu(&tlb, 0, -1); + pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", + task_pid_nr(tsk), tsk->comm, + K(get_mm_counter(mm, MM_ANONPAGES)), + K(get_mm_counter(mm, MM_FILEPAGES)), + K(get_mm_counter(mm, MM_SHMEMPAGES))); + up_read(&mm->mmap_sem); + + /* + * Clear TIF_MEMDIE because the task shouldn't be sitting on a + * reasonably reclaimable memory anymore. OOM killer can continue + * by selecting other victim if unmapping hasn't led to any + * improvements. This also means that selecting this task doesn't + * make any sense. + */ + tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN; + exit_oom_victim(tsk); +out: + mmput(mm); + return ret; +} + +#define MAX_OOM_REAP_RETRIES 10 +static void oom_reap_task(struct task_struct *tsk) +{ + int attempts = 0; + + /* Retry the down_read_trylock(mmap_sem) a few times */ + while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk)) + schedule_timeout_idle(HZ/10); + + if (attempts > MAX_OOM_REAP_RETRIES) { + pr_info("oom_reaper: unable to reap pid:%d (%s)\n", + task_pid_nr(tsk), tsk->comm); + debug_show_all_locks(); + } + + /* Drop a reference taken by wake_oom_reaper */ + put_task_struct(tsk); +} + +static int oom_reaper(void *unused) +{ + set_freezable(); + + while (true) { + struct task_struct *tsk = NULL; + + wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL); + spin_lock(&oom_reaper_lock); + if (oom_reaper_list != NULL) { + tsk = oom_reaper_list; + oom_reaper_list = tsk->oom_reaper_list; + } + spin_unlock(&oom_reaper_lock); + + if (tsk) + oom_reap_task(tsk); + } + + return 0; +} + +static void wake_oom_reaper(struct task_struct *tsk) +{ + if (!oom_reaper_th) + return; + + /* tsk is already queued? */ + if (tsk == oom_reaper_list || tsk->oom_reaper_list) + return; + + get_task_struct(tsk); + + spin_lock(&oom_reaper_lock); + tsk->oom_reaper_list = oom_reaper_list; + oom_reaper_list = tsk; + spin_unlock(&oom_reaper_lock); + wake_up(&oom_reaper_wait); +} + +static int __init oom_init(void) +{ + oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); + if (IS_ERR(oom_reaper_th)) { + pr_err("Unable to start OOM reaper %ld. Continuing regardless\n", + PTR_ERR(oom_reaper_th)); + oom_reaper_th = NULL; + } + return 0; +} +subsys_initcall(oom_init) +#else +static void wake_oom_reaper(struct task_struct *tsk) +{ +} +#endif + /** * mark_oom_victim - mark the given task as OOM victim * @tsk: task to mark @@ -434,9 +606,10 @@ void mark_oom_victim(struct task_struct *tsk) /** * exit_oom_victim - note the exit of an OOM victim */ -void exit_oom_victim(void) +void exit_oom_victim(struct task_struct *tsk) { - clear_thread_flag(TIF_MEMDIE); + if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE)) + return; if (!atomic_dec_return(&oom_victims)) wake_up_all(&oom_victims_wait); @@ -458,15 +631,11 @@ void exit_oom_victim(void) bool oom_killer_disable(void) { /* - * Make sure to not race with an ongoing OOM killer - * and that the current is not the victim. + * Make sure to not race with an ongoing OOM killer. Check that the + * current is not killed (possibly due to sharing the victim's memory). */ - mutex_lock(&oom_lock); - if (test_thread_flag(TIF_MEMDIE)) { - mutex_unlock(&oom_lock); + if (mutex_lock_killable(&oom_lock)) return false; - } - oom_killer_disabled = true; mutex_unlock(&oom_lock); @@ -501,7 +670,6 @@ static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) return false; } -#define K(x) ((x) << (PAGE_SHIFT-10)) /* * Must be called while holding a reference to p, which will be released upon * returning. @@ -517,6 +685,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, unsigned int victim_points = 0; static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); + bool can_oom_reap = true; /* * If the task is already exiting, don't alarm the sysadmin or kill @@ -607,17 +776,23 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, continue; if (same_thread_group(p, victim)) continue; - if (unlikely(p->flags & PF_KTHREAD)) - continue; - if (is_global_init(p)) - continue; - if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) + if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p) || + p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { + /* + * We cannot use oom_reaper for the mm shared by this + * process because it wouldn't get killed and so the + * memory might be still used. + */ + can_oom_reap = false; continue; - + } do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); } rcu_read_unlock(); + if (can_oom_reap) + wake_oom_reaper(victim); + mmdrop(mm); put_task_struct(victim); } |