summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/espfix_64.c3
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/mm/fault.c107
-rw-r--r--arch/x86/mm/init.c5
4 files changed, 117 insertions, 3 deletions
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4d38416e2..a457bce46 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -173,6 +173,7 @@ void init_espfix_ap(int cpu)
struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
pmd_p = (pmd_t *)page_address(page);
+ SetPageTOI_Untracked(virt_to_page(pmd_p));
pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -185,6 +186,7 @@ void init_espfix_ap(int cpu)
struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
pte_p = (pte_t *)page_address(page);
+ SetPageTOI_Untracked(virt_to_page(pte_p));
pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -193,6 +195,7 @@ void init_espfix_ap(int cpu)
pte_p = pte_offset_kernel(&pmd, addr);
stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
+ SetPageTOI_Untracked(virt_to_page(stack_page));
pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
for (n = 0; n < ESPFIX_PTE_CLONES; n++)
set_pte(&pte_p[n*PTE_STRIDE], pte);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3d743da82..4b20bb75b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -12,6 +12,7 @@
#include <linux/percpu.h>
#include <linux/timex.h>
#include <linux/static_key.h>
+#include <linux/mm.h>
#include <asm/hpet.h>
#include <asm/timer.h>
@@ -195,6 +196,10 @@ static void cyc2ns_init(int cpu)
c2n->head = c2n->data;
c2n->tail = c2n->data;
+
+ // Don't let TuxOnIce make data RO - a secondary CPU will cause a triple fault
+ // if it loads microcode, which then does a printk, which may end up invoking cycles_2_ns
+ SetPageTOI_Untracked(virt_to_page(c2n));
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e830c71a1..8d2472be4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -13,6 +13,7 @@
#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
+#include <linux/tuxonice.h> /* incremental image support */
#include <linux/uaccess.h> /* faulthandler_disabled() */
#include <asm/traps.h> /* dotraplinkage, ... */
@@ -662,6 +663,10 @@ no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long flags;
int sig;
+ if (toi_make_writable(init_mm.pgd, address)) {
+ return;
+ }
+
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs)) {
/*
@@ -916,10 +921,101 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
}
}
+#ifdef CONFIG_TOI_INCREMENTAL
+/**
+ * _toi_do_cbw - Do a copy-before-write before letting the faulting process continue
+ */
+static void toi_do_cbw(struct page *page)
+{
+ struct toi_cbw_state *state = this_cpu_ptr(&toi_cbw_states);
+
+ state->active = 1;
+ wmb();
+
+ if (state->enabled && state->next && PageTOI_CBW(page)) {
+ struct toi_cbw *this = state->next;
+ memcpy(this->virt, page_address(page), PAGE_SIZE);
+ this->pfn = page_to_pfn(page);
+ state->next = this->next;
+ }
+
+ state->active = 0;
+}
+
+/**
+ * _toi_make_writable - Defuse TOI's write protection
+ */
+int _toi_make_writable(pte_t *pte)
+{
+ struct page *page = pte_page(*pte);
+ if (PageTOI_RO(page)) {
+ pgd_t *pgd = __va(read_cr3());
+ /*
+ * If this is a TuxOnIce caused fault, we may not have permission to
+ * write to a page needed to reset the permissions of the original
+ * page. Use swapper_pg_dir to get around this.
+ */
+ load_cr3(swapper_pg_dir);
+
+ set_pte_atomic(pte, pte_mkwrite(*pte));
+ SetPageTOI_Dirty(page);
+ ClearPageTOI_RO(page);
+
+ toi_do_cbw(page);
+
+ load_cr3(pgd);
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * toi_make_writable - Handle a (potential) fault caused by TOI's write protection
+ *
+ * Make a page writable that was protected. Might be because of a fault, or
+ * because we're allocating it and want it to be untracked.
+ *
+ * Note that in the fault handling case, we don't care about the error code. If
+ * called from the double fault handler, we won't have one. We just check to
+ * see if the page was made RO by TOI, and mark it dirty/release the protection
+ * if it was.
+ */
+int toi_make_writable(pgd_t *pgd, unsigned long address)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd + pgd_index(address);
+ if (!pgd_present(*pgd))
+ return 0;
+
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return 0;
+
+ if (pud_large(*pud))
+ return _toi_make_writable((pte_t *) pud);
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ return 0;
+
+ if (pmd_large(*pmd))
+ return _toi_make_writable((pte_t *) pmd);
+
+ pte = pte_offset_kernel(pmd, address);
+ if (!pte_present(*pte))
+ return 0;
+
+ return _toi_make_writable(pte);
+}
+#endif
+
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
if ((error_code & PF_WRITE) && !pte_write(*pte))
- return 0;
+ return 0;
if ((error_code & PF_INSTR) && !pte_exec(*pte))
return 0;
@@ -1082,6 +1178,15 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
kmemcheck_hide(regs);
prefetchw(&mm->mmap_sem);
+ /*
+ * Detect and handle page faults due to TuxOnIce making pages read-only
+ * so that it can create incremental images.
+ *
+ * Do it early to avoid double faults.
+ */
+ if (unlikely(toi_make_writable(init_mm.pgd, address)))
+ return;
+
if (unlikely(kmmio_fault(regs, address)))
return;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 493f54172..695ac7d37 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -150,9 +150,10 @@ static int page_size_mask;
static void __init probe_page_size_mask(void)
{
-#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
+#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_TOI_INCREMENTAL)
/*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * For CONFIG_DEBUG_PAGEALLOC or TuxOnIce's incremental image support,
+ * identity mapping will use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/