diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-06-10 05:30:17 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-06-10 05:30:17 -0300 |
commit | d635711daa98be86d4c7fd01499c34f566b54ccb (patch) | |
tree | aa5cc3760a27c3d57146498cb82fa549547de06c /arch/x86/mm/fault.c | |
parent | c91265cd0efb83778f015b4d4b1129bd2cfd075e (diff) |
Linux-libre 4.6.2-gnu
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r-- | arch/x86/mm/fault.c | 154 |
1 files changed, 130 insertions, 24 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8d2472be4..9d995ea4f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -16,12 +16,14 @@ #include <linux/tuxonice.h> /* incremental image support */ #include <linux/uaccess.h> /* faulthandler_disabled() */ +#include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_ADDR */ #include <asm/vsyscall.h> /* emulate_vsyscall */ #include <asm/vm86.h> /* struct vm86 */ +#include <asm/mmu_context.h> /* vma_pkey() */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -34,6 +36,7 @@ * bit 2 == 0: kernel-mode access 1: user-mode access * bit 3 == 1: use of reserved bit detected * bit 4 == 1: fault was an instruction fetch + * bit 5 == 1: protection keys block access */ enum x86_pf_error_code { @@ -42,6 +45,7 @@ enum x86_pf_error_code { PF_USER = 1 << 2, PF_RSVD = 1 << 3, PF_INSTR = 1 << 4, + PF_PK = 1 << 5, }; /* @@ -168,9 +172,60 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) return prefetch; } +/* + * A protection key fault means that the PKRU value did not allow + * access to some PTE. Userspace can figure out what PKRU was + * from the XSAVE state, and this function fills out a field in + * siginfo so userspace can discover which protection key was set + * on the PTE. + * + * If we get here, we know that the hardware signaled a PF_PK + * fault and that there was a VMA once we got in the fault + * handler. It does *not* guarantee that the VMA we find here + * was the one that we faulted on. + * + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); + * 2. T1 : set PKRU to deny access to pkey=4, touches page + * 3. T1 : faults... + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); + * 5. T1 : enters fault handler, takes mmap_sem, etc... + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ +static void fill_sig_info_pkey(int si_code, siginfo_t *info, + struct vm_area_struct *vma) +{ + /* This is effectively an #ifdef */ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return; + + /* Fault not from Protection Keys: nothing to do */ + if (si_code != SEGV_PKUERR) + return; + /* + * force_sig_info_fault() is called from a number of + * contexts, some of which have a VMA and some of which + * do not. The PF_PK handing happens after we have a + * valid VMA, so we should never reach this without a + * valid VMA. + */ + if (!vma) { + WARN_ONCE(1, "PKU fault with no VMA passed in"); + info->si_pkey = 0; + return; + } + /* + * si_pkey should be thought of as a strong hint, but not + * absolutely guranteed to be 100% accurate because of + * the race explained above. + */ + info->si_pkey = vma_pkey(vma); +} + static void force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk, int fault) + struct task_struct *tsk, struct vm_area_struct *vma, + int fault) { unsigned lsb = 0; siginfo_t info; @@ -185,6 +240,8 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; + fill_sig_info_pkey(si_code, &info, vma); + force_sig_info(si_signo, &info, tsk); } @@ -662,13 +719,15 @@ no_context(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; unsigned long flags; int sig; + /* No context means no VMA to pass down */ + struct vm_area_struct *vma = NULL; if (toi_make_writable(init_mm.pgd, address)) { return; } /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) { + if (fixup_exception(regs, X86_TRAP_PF)) { /* * Any interrupt that takes a fault gets the fixup. This makes * the below recursive fault logic only apply to a faults from @@ -689,7 +748,8 @@ no_context(struct pt_regs *regs, unsigned long error_code, tsk->thread.cr2 = address; /* XXX: hwpoison faults will set the wrong code. */ - force_sig_info_fault(signal, si_code, address, tsk, 0); + force_sig_info_fault(signal, si_code, address, + tsk, vma, 0); } /* @@ -766,7 +826,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int si_code) + unsigned long address, struct vm_area_struct *vma, + int si_code) { struct task_struct *tsk = current; @@ -809,7 +870,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); + force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0); return; } @@ -822,14 +883,14 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static noinline void bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address) + unsigned long address, struct vm_area_struct *vma) { - __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); + __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR); } static void __bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int si_code) + unsigned long address, struct vm_area_struct *vma, int si_code) { struct mm_struct *mm = current->mm; @@ -839,25 +900,50 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, */ up_read(&mm->mmap_sem); - __bad_area_nosemaphore(regs, error_code, address, si_code); + __bad_area_nosemaphore(regs, error_code, address, vma, si_code); } static noinline void bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - __bad_area(regs, error_code, address, SEGV_MAPERR); + __bad_area(regs, error_code, address, NULL, SEGV_MAPERR); +} + +static inline bool bad_area_access_from_pkeys(unsigned long error_code, + struct vm_area_struct *vma) +{ + /* This code is always called on the current mm */ + bool foreign = false; + + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return false; + if (error_code & PF_PK) + return true; + /* this checks permission keys on the VMA: */ + if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), + (error_code & PF_INSTR), foreign)) + return true; + return false; } static noinline void bad_area_access_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address) + unsigned long address, struct vm_area_struct *vma) { - __bad_area(regs, error_code, address, SEGV_ACCERR); + /* + * This OSPKE check is not strictly necessary at runtime. + * But, doing it this way allows compiler optimizations + * if pkeys are compiled out. + */ + if (bad_area_access_from_pkeys(error_code, vma)) + __bad_area(regs, error_code, address, vma, SEGV_PKUERR); + else + __bad_area(regs, error_code, address, vma, SEGV_ACCERR); } static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - unsigned int fault) + struct vm_area_struct *vma, unsigned int fault) { struct task_struct *tsk = current; int code = BUS_ADRERR; @@ -884,12 +970,13 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, code = BUS_MCEERR_AR; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk, fault); + force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault); } static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, unsigned int fault) + unsigned long address, struct vm_area_struct *vma, + unsigned int fault) { if (fatal_signal_pending(current) && !(error_code & PF_USER)) { no_context(regs, error_code, address, 0, 0); @@ -913,9 +1000,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) - do_sigbus(regs, error_code, address, fault); + do_sigbus(regs, error_code, address, vma, fault); else if (fault & VM_FAULT_SIGSEGV) - bad_area_nosemaphore(regs, error_code, address); + bad_area_nosemaphore(regs, error_code, address, vma); else BUG(); } @@ -1015,10 +1102,16 @@ int toi_make_writable(pgd_t *pgd, unsigned long address) static int spurious_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & PF_WRITE) && !pte_write(*pte)) - return 0; + return 0; if ((error_code & PF_INSTR) && !pte_exec(*pte)) return 0; + /* + * Note: We do not do lazy flushing on protection key + * changes, so no spurious fault will ever set PF_PK. + */ + if ((error_code & PF_PK)) + return 1; return 1; } @@ -1108,6 +1201,17 @@ int show_unhandled_signals = 1; static inline int access_error(unsigned long error_code, struct vm_area_struct *vma) { + /* This is only called for the current mm, so: */ + bool foreign = false; + /* + * Make sure to check the VMA so that we do not perform + * faults just to hit a PF_PK as soon as we fill in a + * page. + */ + if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), + (error_code & PF_INSTR), foreign)) + return 1; + if (error_code & PF_WRITE) { /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) @@ -1223,7 +1327,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock: */ - bad_area_nosemaphore(regs, error_code, address); + bad_area_nosemaphore(regs, error_code, address, NULL); return; } @@ -1236,7 +1340,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, pgtable_bad(regs, error_code, address); if (unlikely(smap_violation(error_code, regs))) { - bad_area_nosemaphore(regs, error_code, address); + bad_area_nosemaphore(regs, error_code, address, NULL); return; } @@ -1245,7 +1349,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * in a region with pagefaults disabled then we must not take the fault */ if (unlikely(faulthandler_disabled() || !mm)) { - bad_area_nosemaphore(regs, error_code, address); + bad_area_nosemaphore(regs, error_code, address, NULL); return; } @@ -1269,6 +1373,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, if (error_code & PF_WRITE) flags |= FAULT_FLAG_WRITE; + if (error_code & PF_INSTR) + flags |= FAULT_FLAG_INSTRUCTION; /* * When running in the kernel we expect faults to occur only to @@ -1289,7 +1395,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && !search_exception_tables(regs->ip)) { - bad_area_nosemaphore(regs, error_code, address); + bad_area_nosemaphore(regs, error_code, address, NULL); return; } retry: @@ -1337,7 +1443,7 @@ retry: */ good_area: if (unlikely(access_error(error_code, vma))) { - bad_area_access_error(regs, error_code, address); + bad_area_access_error(regs, error_code, address, vma); return; } @@ -1375,7 +1481,7 @@ good_area: up_read(&mm->mmap_sem); if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, error_code, address, fault); + mm_fault_error(regs, error_code, address, vma, fault); return; } |