Linux-libre 4.8.2-gnupck-4.8.2-gnu

author: André Fabian Silva Delgado <emulatorman@parabola.nu> 2016-10-20 00:10:27 -0300
committer: André Fabian Silva Delgado <emulatorman@parabola.nu> 2016-10-20 00:10:27 -0300
commit: d0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch)
tree: 7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /arch/x86/mm
parent: e914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff)
28 files changed, 375 insertions, 411 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 62c0043a5..96d2b847e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
 
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 2ca15b59f..ba47524f5 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -9,7 +9,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 99bfb1928..ea9c49ada 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -14,7 +14,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
@@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = {
 	{ 0, "User Space" },
 #ifdef CONFIG_X86_64
 	{ 0x8000000000000000UL, "Kernel Space" },
-	{ PAGE_OFFSET,		"Low Kernel Mapping" },
-	{ VMALLOC_START,        "vmalloc() Area" },
-	{ VMEMMAP_START,        "Vmemmap" },
+	{ 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
+	{ 0/* VMALLOC_START */, "vmalloc() Area" },
+	{ 0/* VMEMMAP_START */, "Vmemmap" },
 # ifdef CONFIG_X86_ESPFIX64
 	{ ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
 # endif
@@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void)
 
 static int __init pt_dump_init(void)
 {
+	/*
+	 * Various markers are not compile-time constants, so assign them
+	 * here.
+	 */
+#ifdef CONFIG_X86_64
+	address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
+	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
+	address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#endif
 #ifdef CONFIG_X86_32
-	/* Not a compile-time constant on x86-32 */
 	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
 	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
 # ifdef CONFIG_HIGHMEM
@@ -446,8 +454,4 @@ static int __init pt_dump_init(void)
 
 	return 0;
 }
-
 __initcall(pt_dump_init);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
-MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 4bb53b89f..832b98f82 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/traps.h>
+#include <asm/kdebug.h>
 
 typedef bool (*ex_handler_t)(const struct exception_table_entry *,
 			    struct pt_regs *, int);
@@ -37,7 +38,7 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
 		   struct pt_regs *regs, int trapnr)
 {
 	/* Special hack for uaccess_err */
-	current_thread_info()->uaccess_err = 1;
+	current->thread.uaccess_err = 1;
 	regs->ip = ex_fixup_addr(fixup);
 	return true;
 }
@@ -46,8 +47,9 @@ EXPORT_SYMBOL(ex_handler_ext);
 bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
 			     struct pt_regs *regs, int trapnr)
 {
-	WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n",
-		  (unsigned int)regs->cx);
+	if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
+			 (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
+		show_stack_regs(regs);
 
 	/* Pretend that the read succeeded and returned 0. */
 	regs->ip = ex_fixup_addr(fixup);
@@ -60,9 +62,10 @@ EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
 bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
 			     struct pt_regs *regs, int trapnr)
 {
-	WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
-		  (unsigned int)regs->cx,
-		  (unsigned int)regs->dx, (unsigned int)regs->ax);
+	if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
+			 (unsigned int)regs->cx, (unsigned int)regs->dx,
+			 (unsigned int)regs->ax,  regs->ip, (void *)regs->ip))
+		show_stack_regs(regs);
 
 	/* Pretend that the write succeeded. */
 	regs->ip = ex_fixup_addr(fixup);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 441a68fcd..dc8023060 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -13,7 +13,6 @@
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
 #include <linux/prefetch.h>		/* prefetchw			*/
 #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
-#include <linux/tuxonice.h>             /* incremental image support    */
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
@@ -440,7 +439,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	 * happen within a race in page table update. In the later
 	 * case just flush:
 	 */
-	pgd = pgd_offset(current->active_mm, address);
+	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
 	pgd_ref = pgd_offset_k(address);
 	if (pgd_none(*pgd_ref))
 		return -1;
@@ -722,10 +721,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	/* No context means no VMA to pass down */
 	struct vm_area_struct *vma = NULL;
 
-        if (toi_make_writable(init_mm.pgd, address)) {
-            return;
-        }
-
 	/* Are we prepared to handle this kernel fault? */
 	if (fixup_exception(regs, X86_TRAP_PF)) {
 		/*
@@ -742,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		 * In this case we need to make sure we're not recursively
 		 * faulting through the emulate_vsyscall() logic.
 		 */
-		if (current_thread_info()->sig_on_uaccess_error && signal) {
+		if (current->thread.sig_on_uaccess_err && signal) {
 			tsk->thread.trap_nr = X86_TRAP_PF;
 			tsk->thread.error_code = error_code | PF_USER;
 			tsk->thread.cr2 = address;
@@ -1013,101 +1008,10 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	}
 }
 
-#ifdef CONFIG_TOI_INCREMENTAL
-/**
- * _toi_do_cbw - Do a copy-before-write before letting the faulting process continue
- */
-static void toi_do_cbw(struct page *page)
-{
-    struct toi_cbw_state *state = this_cpu_ptr(&toi_cbw_states);
-
-    state->active = 1;
-    wmb();
-
-    if (state->enabled && state->next && PageTOI_CBW(page)) {
-        struct toi_cbw *this = state->next;
-        memcpy(this->virt, page_address(page), PAGE_SIZE);
-        this->pfn = page_to_pfn(page);
-        state->next = this->next;
-    }
-
-    state->active = 0;
-}
-
-/**
- * _toi_make_writable - Defuse TOI's write protection
- */
-int _toi_make_writable(pte_t *pte)
-{
-    struct page *page = pte_page(*pte);
-    if (PageTOI_RO(page)) {
-        pgd_t *pgd = __va(read_cr3());
-        /*
-         * If this is a TuxOnIce caused fault, we may not have permission to
-         * write to a page needed to reset the permissions of the original
-         * page. Use swapper_pg_dir to get around this.
-         */
-        load_cr3(swapper_pg_dir);
-
-        set_pte_atomic(pte, pte_mkwrite(*pte));
-        SetPageTOI_Dirty(page);
-        ClearPageTOI_RO(page);
-
-        toi_do_cbw(page);
-
-        load_cr3(pgd);
-        return 1;
-    }
-    return 0;
-}
-
-/**
- * toi_make_writable - Handle a (potential) fault caused by TOI's write protection
- *
- * Make a page writable that was protected. Might be because of a fault, or
- * because we're allocating it and want it to be untracked.
- *
- * Note that in the fault handling case, we don't care about the error code. If
- * called from the double fault handler, we won't have one. We just check to
- * see if the page was made RO by TOI, and mark it dirty/release the protection
- * if it was.
- */
-int toi_make_writable(pgd_t *pgd, unsigned long address)
-{
-    pud_t *pud;
-    pmd_t *pmd;
-    pte_t *pte;
-
-    pgd = pgd + pgd_index(address);
-    if (!pgd_present(*pgd))
-        return 0;
-
-    pud = pud_offset(pgd, address);
-    if (!pud_present(*pud))
-        return 0;
-
-    if (pud_large(*pud))
-        return _toi_make_writable((pte_t *) pud);
-
-    pmd = pmd_offset(pud, address);
-    if (!pmd_present(*pmd))
-        return 0;
-
-    if (pmd_large(*pmd))
-        return _toi_make_writable((pte_t *) pmd);
-
-    pte = pte_offset_kernel(pmd, address);
-    if (!pte_present(*pte))
-        return 0;
-
-    return _toi_make_writable(pte);
-}
-#endif
-
 static int spurious_fault_check(unsigned long error_code, pte_t *pte)
 {
 	if ((error_code & PF_WRITE) && !pte_write(*pte))
-               return 0;
+		return 0;
 
 	if ((error_code & PF_INSTR) && !pte_exec(*pte))
 		return 0;
@@ -1287,15 +1191,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 		kmemcheck_hide(regs);
 	prefetchw(&mm->mmap_sem);
 
-        /*
-         * Detect and handle page faults due to TuxOnIce making pages read-only
-         * so that it can create incremental images.
-         *
-         * Do it early to avoid double faults.
-         */
-        if (unlikely(toi_make_writable(init_mm.pgd, address)))
-            return;
-
 	if (unlikely(kmmio_fault(regs, address)))
 		return;
 
@@ -1458,7 +1353,7 @@ good_area:
 	 * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
 	 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
 	 */
-	fault = handle_mm_fault(mm, vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags);
 	major |= fault & VM_FAULT_MAJOR;
 
 	/*
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index a6d739258..6d18b70ed 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,5 +1,5 @@
 #include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/swap.h> /* for totalram_pages */
 #include <linux/bootmem.h>
 
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index ec21796ac..4473cb4f8 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -3,15 +3,17 @@
  * included by both the compressed kernel and the regular kernel.
  */
 
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
 			   unsigned long addr, unsigned long end)
 {
 	addr &= PMD_MASK;
 	for (; addr < end; addr += PMD_SIZE) {
 		pmd_t *pmd = pmd_page + pmd_index(addr);
 
-		if (!pmd_present(*pmd))
-			set_pmd(pmd, __pmd(addr | pmd_flag));
+		if (pmd_present(*pmd))
+			continue;
+
+		set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
 	}
 }
 
@@ -30,13 +32,13 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 
 		if (pud_present(*pud)) {
 			pmd = pmd_offset(pud, 0);
-			ident_pmd_init(info->pmd_flag, pmd, addr, next);
+			ident_pmd_init(info, pmd, addr, next);
 			continue;
 		}
 		pmd = (pmd_t *)info->alloc_pgt_page(info->context);
 		if (!pmd)
 			return -ENOMEM;
-		ident_pmd_init(info->pmd_flag, pmd, addr, next);
+		ident_pmd_init(info, pmd, addr, next);
 		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
 	}
 
@@ -44,14 +46,15 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 }
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-			      unsigned long addr, unsigned long end)
+			      unsigned long pstart, unsigned long pend)
 {
+	unsigned long addr = pstart + info->offset;
+	unsigned long end = pend + info->offset;
 	unsigned long next;
 	int result;
-	int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
 
 	for (; addr < end; addr = next) {
-		pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+		pgd_t *pgd = pgd_page + pgd_index(addr);
 		pud_t *pud;
 
 		next = (addr & PGDIR_MASK) + PGDIR_SIZE;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4459e4c4b..d28a2d741 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -17,6 +17,7 @@
 #include <asm/proto.h>
 #include <asm/dma.h>		/* for MAX_DMA_PFN */
 #include <asm/microcode.h>
+#include <asm/kaslr.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -121,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num)
 	return __va(pfn << PAGE_SHIFT);
 }
 
-/* need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE	(6 * PAGE_SIZE)
+/*
+ * By default need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS.
+ * With KASLR memory randomization, depending on the machine e820 memory
+ * and the PUD alignment. We may need twice more pages when KASLR memory
+ * randomization is enabled.
+ */
+#ifndef CONFIG_RANDOMIZE_MEMORY
+#define INIT_PGD_PAGE_COUNT      6
+#else
+#define INIT_PGD_PAGE_COUNT      12
+#endif
+#define INIT_PGT_BUF_SIZE	(INIT_PGD_PAGE_COUNT * PAGE_SIZE)
 RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
 void  __init early_alloc_pgt_buf(void)
 {
@@ -150,10 +161,10 @@ static int page_size_mask;
 
 static void __init probe_page_size_mask(void)
 {
-#if !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_TOI_INCREMENTAL)
+#if !defined(CONFIG_KMEMCHECK)
 	/*
-         * For CONFIG_KMEMCHECK, TuxOnIce's incremental image support or
-         * pagealloc debugging, identity mapping will use small pages.
+	 * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
+	 * use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
@@ -207,7 +218,7 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
  * adjust the page_size_mask for small range to go with
  *	big page size instead small one if nearby are ram too.
  */
-static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
+static void __ref adjust_range_page_size_mask(struct map_range *mr,
 							 int nr_range)
 {
 	int i;
@@ -395,7 +406,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
  * This runs before bootmem is initialized and gets pages directly from
  * the physical memory. To access them they are temporarily mapped.
  */
-unsigned long __init_refok init_memory_mapping(unsigned long start,
+unsigned long __ref init_memory_mapping(unsigned long start,
 					       unsigned long end)
 {
 	struct map_range mr[NR_RANGE_MR];
@@ -590,6 +601,9 @@ void __init init_mem_mapping(void)
 	/* the ISA range is always mapped regardless of memory holes */
 	init_memory_mapping(0, ISA_END_ADDRESS);
 
+	/* Init the trampoline, possibly with KASLR memory offset */
+	init_trampoline();
+
 	/*
 	 * If the allocation is in bottom-up direction, we setup direct mapping
 	 * in bottom-up, otherwise we setup direct mapping in top-down.
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 84df150ee..cf8059016 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -5,7 +5,6 @@
  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
-#include <linux/module.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bce2e5d9e..14b9dd71d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -27,7 +27,6 @@
 #include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/dma-mapping.h>
-#include <linux/module.h>
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
 #include <linux/memremap.h>
@@ -328,22 +327,30 @@ void __init cleanup_highmap(void)
 	}
 }
 
+/*
+ * Create PTE level page table mapping for physical addresses.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
+phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 	      pgprot_t prot)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
+	pte_t *pte;
 	int i;
 
-	pte_t *pte = pte_page + pte_index(addr);
+	pte = pte_page + pte_index(paddr);
+	i = pte_index(paddr);
 
-	for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
-		next = (addr & PAGE_MASK) + PAGE_SIZE;
-		if (addr >= end) {
+	for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
+		paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pte(pte, __pte(0));
 			continue;
 		}
@@ -354,54 +361,61 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 		 * pagetable pages as RO. So assume someone who pre-setup
 		 * these mappings are more intelligent.
 		 */
-		if (pte_val(*pte)) {
+		if (!pte_none(*pte)) {
 			if (!after_bootmem)
 				pages++;
 			continue;
 		}
 
 		if (0)
-			printk("   pte=%p addr=%lx pte=%016lx\n",
-			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
+			pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
+				pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 		pages++;
-		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
-		last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
+		set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+		paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
 	}
 
 	update_page_count(PG_LEVEL_4K, pages);
 
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create PMD level page table mapping for physical addresses. The virtual
+ * and physical address have to be aligned at this level.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
+phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 	      unsigned long page_size_mask, pgprot_t prot)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
 
-	int i = pmd_index(address);
+	int i = pmd_index(paddr);
 
-	for (; i < PTRS_PER_PMD; i++, address = next) {
-		pmd_t *pmd = pmd_page + pmd_index(address);
+	for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
+		pmd_t *pmd = pmd_page + pmd_index(paddr);
 		pte_t *pte;
 		pgprot_t new_prot = prot;
 
-		next = (address & PMD_MASK) + PMD_SIZE;
-		if (address >= end) {
+		paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pmd(pmd, __pmd(0));
 			continue;
 		}
 
-		if (pmd_val(*pmd)) {
+		if (!pmd_none(*pmd)) {
 			if (!pmd_large(*pmd)) {
 				spin_lock(&init_mm.page_table_lock);
 				pte = (pte_t *)pmd_page_vaddr(*pmd);
-				last_map_addr = phys_pte_init(pte, address,
-								end, prot);
+				paddr_last = phys_pte_init(pte, paddr,
+							   paddr_end, prot);
 				spin_unlock(&init_mm.page_table_lock);
 				continue;
 			}
@@ -420,7 +434,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			if (page_size_mask & (1 << PG_LEVEL_2M)) {
 				if (!after_bootmem)
 					pages++;
-				last_map_addr = next;
+				paddr_last = paddr_next;
 				continue;
 			}
 			new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -430,51 +444,65 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
 			set_pte((pte_t *)pmd,
-				pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
+				pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
 					__pgprot(pgprot_val(prot) | _PAGE_PSE)));
 			spin_unlock(&init_mm.page_table_lock);
-			last_map_addr = next;
+			paddr_last = paddr_next;
 			continue;
 		}
 
 		pte = alloc_low_page();
-		last_map_addr = phys_pte_init(pte, address, end, new_prot);
+		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
 
 		spin_lock(&init_mm.page_table_lock);
 		pmd_populate_kernel(&init_mm, pmd, pte);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	update_page_count(PG_LEVEL_2M, pages);
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create PUD level page table mapping for physical addresses. The virtual
+ * and physical address do not have to be aligned at this level. KASLR can
+ * randomize virtual addresses up to this level.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
-			 unsigned long page_size_mask)
+phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
+	      unsigned long page_size_mask)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
-	int i = pud_index(addr);
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
+	unsigned long vaddr = (unsigned long)__va(paddr);
+	int i = pud_index(vaddr);
 
-	for (; i < PTRS_PER_PUD; i++, addr = next) {
-		pud_t *pud = pud_page + pud_index(addr);
+	for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
+		pud_t *pud;
 		pmd_t *pmd;
 		pgprot_t prot = PAGE_KERNEL;
 
-		next = (addr & PUD_MASK) + PUD_SIZE;
-		if (addr >= end) {
+		vaddr = (unsigned long)__va(paddr);
+		pud = pud_page + pud_index(vaddr);
+		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pud(pud, __pud(0));
 			continue;
 		}
 
-		if (pud_val(*pud)) {
+		if (!pud_none(*pud)) {
 			if (!pud_large(*pud)) {
 				pmd = pmd_offset(pud, 0);
-				last_map_addr = phys_pmd_init(pmd, addr, end,
-							 page_size_mask, prot);
+				paddr_last = phys_pmd_init(pmd, paddr,
+							   paddr_end,
+							   page_size_mask,
+							   prot);
 				__flush_tlb_all();
 				continue;
 			}
@@ -493,7 +521,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			if (page_size_mask & (1 << PG_LEVEL_1G)) {
 				if (!after_bootmem)
 					pages++;
-				last_map_addr = next;
+				paddr_last = paddr_next;
 				continue;
 			}
 			prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -503,16 +531,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
 			set_pte((pte_t *)pud,
-				pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
+				pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
 					PAGE_KERNEL_LARGE));
 			spin_unlock(&init_mm.page_table_lock);
-			last_map_addr = next;
+			paddr_last = paddr_next;
 			continue;
 		}
 
 		pmd = alloc_low_page();
-		last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
-					      prot);
+		paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
+					   page_size_mask, prot);
 
 		spin_lock(&init_mm.page_table_lock);
 		pud_populate(&init_mm, pud, pmd);
@@ -522,38 +550,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 
 	update_page_count(PG_LEVEL_1G, pages);
 
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
 unsigned long __meminit
-kernel_physical_mapping_init(unsigned long start,
-			     unsigned long end,
+kernel_physical_mapping_init(unsigned long paddr_start,
+			     unsigned long paddr_end,
 			     unsigned long page_size_mask)
 {
 	bool pgd_changed = false;
-	unsigned long next, last_map_addr = end;
-	unsigned long addr;
+	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
 
-	start = (unsigned long)__va(start);
-	end = (unsigned long)__va(end);
-	addr = start;
+	paddr_last = paddr_end;
+	vaddr = (unsigned long)__va(paddr_start);
+	vaddr_end = (unsigned long)__va(paddr_end);
+	vaddr_start = vaddr;
 
-	for (; start < end; start = next) {
-		pgd_t *pgd = pgd_offset_k(start);
+	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+		pgd_t *pgd = pgd_offset_k(vaddr);
 		pud_t *pud;
 
-		next = (start & PGDIR_MASK) + PGDIR_SIZE;
+		vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
 
 		if (pgd_val(*pgd)) {
 			pud = (pud_t *)pgd_page_vaddr(*pgd);
-			last_map_addr = phys_pud_init(pud, __pa(start),
-						 __pa(end), page_size_mask);
+			paddr_last = phys_pud_init(pud, __pa(vaddr),
+						   __pa(vaddr_end),
+						   page_size_mask);
 			continue;
 		}
 
 		pud = alloc_low_page();
-		last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
-						 page_size_mask);
+		paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+					   page_size_mask);
 
 		spin_lock(&init_mm.page_table_lock);
 		pgd_populate(&init_mm, pgd, pud);
@@ -562,11 +596,11 @@ kernel_physical_mapping_init(unsigned long start,
 	}
 
 	if (pgd_changed)
-		sync_global_pgds(addr, end - 1, 0);
+		sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
 
 	__flush_tlb_all();
 
-	return last_map_addr;
+	return paddr_last;
 }
 
 #ifndef CONFIG_NUMA
@@ -673,7 +707,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
 
 	for (i = 0; i < PTRS_PER_PTE; i++) {
 		pte = pte_start + i;
-		if (pte_val(*pte))
+		if (!pte_none(*pte))
 			return;
 	}
 
@@ -691,7 +725,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 
 	for (i = 0; i < PTRS_PER_PMD; i++) {
 		pmd = pmd_start + i;
-		if (pmd_val(*pmd))
+		if (!pmd_none(*pmd))
 			return;
 	}
 
@@ -702,27 +736,6 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 	spin_unlock(&init_mm.page_table_lock);
 }
 
-/* Return true if pgd is changed, otherwise return false. */
-static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
-{
-	pud_t *pud;
-	int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++) {
-		pud = pud_start + i;
-		if (pud_val(*pud))
-			return false;
-	}
-
-	/* free a pud table */
-	free_pagetable(pgd_page(*pgd), 0);
-	spin_lock(&init_mm.page_table_lock);
-	pgd_clear(pgd);
-	spin_unlock(&init_mm.page_table_lock);
-
-	return true;
-}
-
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 		 bool direct)
@@ -913,7 +926,6 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 	unsigned long addr;
 	pgd_t *pgd;
 	pud_t *pud;
-	bool pgd_changed = false;
 
 	for (addr = start; addr < end; addr = next) {
 		next = pgd_addr_end(addr, end);
@@ -924,13 +936,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 
 		pud = (pud_t *)pgd_page_vaddr(*pgd);
 		remove_pud_table(pud, addr, next, direct);
-		if (free_pud_table(pud, pgd))
-			pgd_changed = true;
 	}
 
-	if (pgd_changed)
-		sync_global_pgds(start, end - 1, 1);
-
 	flush_tlb_all();
 }
 
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 9c0ff045f..ada98b39b 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -18,7 +18,7 @@
 
 #include <asm/iomap.h>
 #include <asm/pat.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/highmem.h>
 
 static int is_io_mapping_possible(resource_size_t base, unsigned long size)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index f0894910b..7aaa26358 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -9,7 +9,6 @@
 #include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mmiotrace.h>
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
new file mode 100644
index 000000000..bda8d5eef
--- /dev/null
+++ b/arch/x86/mm/kaslr.c
@@ -0,0 +1,172 @@
+/*
+ * This file implements KASLR memory randomization for x86_64. It randomizes
+ * the virtual address space of kernel memory regions (physical memory
+ * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
+ * exploits relying on predictable kernel addresses.
+ *
+ * Entropy is generated using the KASLR early boot functions now shared in
+ * the lib directory (originally written by Kees Cook). Randomization is
+ * done on PGD & PUD page table levels to increase possible addresses. The
+ * physical memory mapping code was adapted to support PUD level virtual
+ * addresses. This implementation on the best configuration provides 30,000
+ * possible virtual addresses in average for each memory region. An additional
+ * low memory page is used to ensure each CPU can start with a PGD aligned
+ * virtual address (for realmode).
+ *
+ * The order of each memory region is not changed. The feature looks at
+ * the available space for the regions based on different configuration
+ * options and randomizes the base and space between each. The size of the
+ * physical memory mapping is the available physical memory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/random.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/kaslr.h>
+
+#include "mm_internal.h"
+
+#define TB_SHIFT 40
+
+/*
+ * Virtual address start and end range for randomization. The end changes base
+ * on configuration to have the highest amount of space for randomization.
+ * It increases the possible random position for each randomized region.
+ *
+ * You need to add an if/def entry if you introduce a new memory region
+ * compatible with KASLR. Your entry must be in logical order with memory
+ * layout. For example, ESPFIX is before EFI because its virtual address is
+ * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
+ * ensure that this order is correct and won't be changed.
+ */
+static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
+static const unsigned long vaddr_end = VMEMMAP_START;
+
+/* Default values */
+unsigned long page_offset_base = __PAGE_OFFSET_BASE;
+EXPORT_SYMBOL(page_offset_base);
+unsigned long vmalloc_base = __VMALLOC_BASE;
+EXPORT_SYMBOL(vmalloc_base);
+
+/*
+ * Memory regions randomized by KASLR (except modules that use a separate logic
+ * earlier during boot). The list is ordered based on virtual addresses. This
+ * order is kept after randomization.
+ */
+static __initdata struct kaslr_memory_region {
+	unsigned long *base;
+	unsigned long size_tb;
+} kaslr_regions[] = {
+	{ &page_offset_base, 64/* Maximum */ },
+	{ &vmalloc_base, VMALLOC_SIZE_TB },
+};
+
+/* Get size in bytes used by the memory region */
+static inline unsigned long get_padding(struct kaslr_memory_region *region)
+{
+	return (region->size_tb << TB_SHIFT);
+}
+
+/*
+ * Apply no randomization if KASLR was disabled at boot or if KASAN
+ * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
+ */
+static inline bool kaslr_memory_enabled(void)
+{
+	return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
+}
+
+/* Initialize base and padding for each memory region randomized with KASLR */
+void __init kernel_randomize_memory(void)
+{
+	size_t i;
+	unsigned long vaddr = vaddr_start;
+	unsigned long rand, memory_tb;
+	struct rnd_state rand_state;
+	unsigned long remain_entropy;
+
+	if (!kaslr_memory_enabled())
+		return;
+
+	/*
+	 * Update Physical memory mapping to available and
+	 * add padding if needed (especially for memory hotplug support).
+	 */
+	BUG_ON(kaslr_regions[0].base != &page_offset_base);
+	memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
+		CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
+
+	/* Adapt phyiscal memory region size based on available memory */
+	if (memory_tb < kaslr_regions[0].size_tb)
+		kaslr_regions[0].size_tb = memory_tb;
+
+	/* Calculate entropy available between regions */
+	remain_entropy = vaddr_end - vaddr_start;
+	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
+		remain_entropy -= get_padding(&kaslr_regions[i]);
+
+	prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
+
+	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
+		unsigned long entropy;
+
+		/*
+		 * Select a random virtual address using the extra entropy
+		 * available.
+		 */
+		entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
+		prandom_bytes_state(&rand_state, &rand, sizeof(rand));
+		entropy = (rand % (entropy + 1)) & PUD_MASK;
+		vaddr += entropy;
+		*kaslr_regions[i].base = vaddr;
+
+		/*
+		 * Jump the region and add a minimum padding based on
+		 * randomization alignment.
+		 */
+		vaddr += get_padding(&kaslr_regions[i]);
+		vaddr = round_up(vaddr + 1, PUD_SIZE);
+		remain_entropy -= entropy;
+	}
+}
+
+/*
+ * Create PGD aligned trampoline table to allow real mode initialization
+ * of additional CPUs. Consume only 1 low memory page.
+ */
+void __meminit init_trampoline(void)
+{
+	unsigned long paddr, paddr_next;
+	pgd_t *pgd;
+	pud_t *pud_page, *pud_page_tramp;
+	int i;
+
+	if (!kaslr_memory_enabled()) {
+		init_trampoline_default();
+		return;
+	}
+
+	pud_page_tramp = alloc_low_page();
+
+	paddr = 0;
+	pgd = pgd_offset_k((unsigned long)__va(paddr));
+	pud_page = (pud_t *) pgd_page_vaddr(*pgd);
+
+	for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
+		pud_t *pud, *pud_tramp;
+		unsigned long vaddr = (unsigned long)__va(paddr);
+
+		pud_tramp = pud_page_tramp + pud_index(paddr);
+		pud = pud_page + pud_index(vaddr);
+		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+
+		*pud_tramp = *pud;
+	}
+
+	set_pgd(&trampoline_pgd_entry,
+		__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+}
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index b4f2e7e9e..4515bae36 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -14,7 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/kmemcheck.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 #include <linux/page-flags.h>
 #include <linux/percpu.h>
 #include <linux/ptrace.h>
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index aec124214..c2638a7d2 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -1,5 +1,5 @@
 #include <linux/kmemcheck.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mm.h>
 
 #include <asm/page.h>
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index ddb2244b0..afc47f5c9 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -11,7 +11,7 @@
 #include <linux/rculist.h>
 #include <linux/spinlock.h>
 #include <linux/hash.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 0057a7acc..bef36622e 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -24,7 +24,7 @@
 
 #define DEBUG 1
 
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/debugfs.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9c086c571..fb682108f 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,4 +1,5 @@
 /* Common code for 32 and 64-bit NUMA */
+#include <linux/acpi.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/string.h>
@@ -7,7 +8,6 @@
 #include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/ctype.h>
-#include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/sched.h>
 #include <linux/topology.h>
@@ -15,7 +15,6 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/dma.h>
-#include <asm/acpi.h>
 #include <asm/amd_nb.h>
 
 #include "numa_internal.h"
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 47b6436e4..6b7ce6279 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -24,7 +24,7 @@
 
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/module.h>
+#include <linux/init.h>
 
 #include "numa_internal.h"
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index b2659b9d0..e3353c97d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void)
 
 static inline unsigned long highmap_end_pfn(void)
 {
-	return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+	/* Do not reference physical address outside the kernel. */
+	return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
 }
 
 #endif
@@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end)
 	return addr >= start && addr < end;
 }
 
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+	return addr >= start && addr <= end;
+}
+
 /*
  * Flushing functions
  */
@@ -746,18 +753,6 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
 	return true;
 }
 
-static bool try_to_free_pud_page(pud_t *pud)
-{
-	int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++)
-		if (!pud_none(pud[i]))
-			return false;
-
-	free_page((unsigned long)pud);
-	return true;
-}
-
 static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
 {
 	pte_t *pte = pte_offset_kernel(pmd, start);
@@ -871,16 +866,6 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
 	 */
 }
 
-static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
-{
-	pgd_t *pgd_entry = root + pgd_index(addr);
-
-	unmap_pud_range(pgd_entry, addr, end);
-
-	if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
-		pgd_clear(pgd_entry);
-}
-
 static int alloc_pte_page(pmd_t *pmd)
 {
 	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@ -1113,7 +1098,12 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 
 	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
 	if (ret < 0) {
-		unmap_pgd_range(cpa->pgd, addr,
+		/*
+		 * Leave the PUD page in place in case some other CPU or thread
+		 * already found it, but remove any useless entries we just
+		 * added to it.
+		 */
+		unmap_pud_range(pgd_entry, addr,
 				addr + (cpa->numpages << PAGE_SHIFT));
 		return ret;
 	}
@@ -1185,7 +1175,7 @@ repeat:
 		return __cpa_process_fault(cpa, address, primary);
 
 	old_pte = *kpte;
-	if (!pte_val(old_pte))
+	if (pte_none(old_pte))
 		return __cpa_process_fault(cpa, address, primary);
 
 	if (level == PG_LEVEL_4K) {
@@ -1316,7 +1306,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	 * to touch the high mapped kernel as well:
 	 */
 	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
-	    within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+	    within_inclusive(cpa->pfn, highmap_start_pfn(),
+			     highmap_end_pfn())) {
 		unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
 					       __START_KERNEL_map - phys_base;
 		alias_cpa = *cpa;
@@ -1992,12 +1983,6 @@ out:
 	return retval;
 }
 
-void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
-			       unsigned numpages)
-{
-	unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
-}
-
 /*
  * The testcases use internal knowledge of the implementation that shouldn't
  * be exposed to the rest of the kernel. Include these directly here.
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 5431a32f7..170cc4ff0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,7 +11,6 @@
 #include <linux/bootmem.h>
 #include <linux/debugfs.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/pfn_t.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -755,11 +754,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 		return 1;
 
 	while (cursor < to) {
-		if (!devmem_is_allowed(pfn)) {
-			pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
-				current->comm, from, to - 1);
+		if (!devmem_is_allowed(pfn))
 			return 0;
-		}
 		cursor += PAGE_SIZE;
 		pfn++;
 	}
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 2f7702253..de391b7bc 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -11,7 +11,6 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/rbtree_augmented.h>
 #include <linux/sched.h>
 #include <linux/gfp.h>
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index 9f0614dae..a23586953 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -26,7 +26,6 @@
  *  Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
  */
 
-#include <linux/module.h>
 #include <linux/ptrace.h> /* struct pt_regs */
 #include "pf_in.h"
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index aa0ff4b02..3feec5af4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,7 +6,7 @@
 #include <asm/fixmap.h>
 #include <asm/mtrr.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
+#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO)
 
 #ifdef CONFIG_HIGHPTE
 #define PGALLOC_USER_GFP __GFP_HIGHMEM
@@ -18,7 +18,7 @@ gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	return (pte_t *)__get_free_page(PGALLOC_GFP);
+	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
 }
 
 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -207,9 +207,13 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
 {
 	int i;
 	bool failed = false;
+	gfp_t gfp = PGALLOC_GFP;
+
+	if (mm == &init_mm)
+		gfp &= ~__GFP_ACCOUNT;
 
 	for(i = 0; i < PREALLOCATED_PMDS; i++) {
-		pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
 		if (!pmd)
 			failed = true;
 		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 75cc0978d..9adce7768 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -8,7 +8,6 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
-#include <linux/module.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -47,7 +46,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 		return;
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	if (pte_val(pteval))
+	if (!pte_none(pteval))
 		set_pte_at(&init_mm, vaddr, pte, pteval);
 	else
 		pte_clear(&init_mm, vaddr, pte);
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index e666cbbb9..cfc3b9121 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -1,6 +1,6 @@
 #include <linux/bootmem.h>
 #include <linux/mmdebug.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mm.h>
 
 #include <asm/page.h>
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index b5f821881..35fe69529 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -13,10 +13,8 @@
 #include <linux/acpi.h>
 #include <linux/mmzone.h>
 #include <linux/bitmap.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <asm/proto.h>
 #include <asm/numa.h>
@@ -24,51 +22,6 @@
 #include <asm/apic.h>
 #include <asm/uv/uv.h>
 
-int acpi_numa __initdata;
-
-static __init int setup_node(int pxm)
-{
-	return acpi_map_pxm_to_node(pxm);
-}
-
-static __init void bad_srat(void)
-{
-	printk(KERN_ERR "SRAT: SRAT not used.\n");
-	acpi_numa = -1;
-}
-
-static __init inline int srat_disabled(void)
-{
-	return acpi_numa < 0;
-}
-
-/*
- * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
- * I/O localities since SRAT does not list them.  I/O localities are
- * not supported at this point.
- */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-	int i, j;
-
-	for (i = 0; i < slit->locality_count; i++) {
-		const int from_node = pxm_to_node(i);
-
-		if (from_node == NUMA_NO_NODE)
-			continue;
-
-		for (j = 0; j < slit->locality_count; j++) {
-			const int to_node = pxm_to_node(j);
-
-			if (to_node == NUMA_NO_NODE)
-				continue;
-
-			numa_set_distance(from_node, to_node,
-				slit->entry[slit->locality_count * i + j]);
-		}
-	}
-}
-
 /* Callback for Proximity Domain -> x2APIC mapping */
 void __init
 acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
@@ -91,7 +44,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 			 pxm, apic_id);
 		return;
 	}
-	node = setup_node(pxm);
+	node = acpi_map_pxm_to_node(pxm);
 	if (node < 0) {
 		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
 		bad_srat();
@@ -104,7 +57,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 	set_apicid_to_node(apic_id, node);
 	node_set(node, numa_nodes_parsed);
-	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
 	       pxm, apic_id, node);
 }
@@ -127,7 +79,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	pxm = pa->proximity_domain_lo;
 	if (acpi_srat_revision >= 2)
 		pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
-	node = setup_node(pxm);
+	node = acpi_map_pxm_to_node(pxm);
 	if (node < 0) {
 		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
 		bad_srat();
@@ -146,74 +98,10 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 
 	set_apicid_to_node(apic_id, node);
 	node_set(node, numa_nodes_parsed);
-	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
 	       pxm, apic_id, node);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline int save_add_info(void) {return 1;}
-#else
-static inline int save_add_info(void) {return 0;}
-#endif
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-int __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
-{
-	u64 start, end;
-	u32 hotpluggable;
-	int node, pxm;
-
-	if (srat_disabled())
-		goto out_err;
-	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
-		goto out_err_bad_srat;
-	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
-		goto out_err;
-	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
-	if (hotpluggable && !save_add_info())
-		goto out_err;
-
-	start = ma->base_address;
-	end = start + ma->length;
-	pxm = ma->proximity_domain;
-	if (acpi_srat_revision <= 1)
-		pxm &= 0xff;
-
-	node = setup_node(pxm);
-	if (node < 0) {
-		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
-		goto out_err_bad_srat;
-	}
-
-	if (numa_add_memblk(node, start, end) < 0)
-		goto out_err_bad_srat;
-
-	node_set(node, numa_nodes_parsed);
-
-	pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
-		node, pxm,
-		(unsigned long long) start, (unsigned long long) end - 1,
-		hotpluggable ? " hotplug" : "",
-		ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
-
-	/* Mark hotplug range in memblock. */
-	if (hotpluggable && memblock_mark_hotplug(start, ma->length))
-		pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
-			(unsigned long long)start, (unsigned long long)end - 1);
-
-	max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
-
-	return 0;
-out_err_bad_srat:
-	bad_srat();
-out_err:
-	return -1;
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
 int __init x86_acpi_numa_init(void)
 {
 	int ret;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5643fd0b1..4dbe65622 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -4,7 +4,7 @@
 #include <linux/spinlock.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/cpu.h>
 
 #include <asm/tlbflush.h>
author	André Fabian Silva Delgado <emulatorman@parabola.nu>	2016-10-20 00:10:27 -0300
committer	André Fabian Silva Delgado <emulatorman@parabola.nu>	2016-10-20 00:10:27 -0300
commit	d0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch)
tree	7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /arch/x86/mm
parent	e914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff)