summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-03-25 03:53:42 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-03-25 03:53:42 -0300
commit03dd4cb26d967f9588437b0fc9cc0e8353322bb7 (patch)
treefa581f6dc1c0596391690d1f67eceef3af8246dc /arch/x86/mm
parentd4e493caf788ef44982e131ff9c786546904d934 (diff)
Linux-libre 4.5-gnu
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/debug_pagetables.c46
-rw-r--r--arch/x86/mm/dump_pagetables.c34
-rw-r--r--arch/x86/mm/fault.c107
-rw-r--r--arch/x86/mm/gup.c72
-rw-r--r--arch/x86/mm/hugetlbpage.c4
-rw-r--r--arch/x86/mm/init.c5
-rw-r--r--arch/x86/mm/init_64.c36
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/mmap.c12
-rw-r--r--arch/x86/mm/numa.c2
-rw-r--r--arch/x86/mm/pageattr.c27
-rw-r--r--arch/x86/mm/pat.c17
-rw-r--r--arch/x86/mm/pat_rbtree.c52
-rw-r--r--arch/x86/mm/pgtable.c20
-rw-r--r--arch/x86/mm/setup_nx.c4
-rw-r--r--arch/x86/mm/srat.c2
17 files changed, 225 insertions, 220 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 65c47fda2..f9d38a48e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
+obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
new file mode 100644
index 000000000..bfcffdf6c
--- /dev/null
+++ b/arch/x86/mm/debug_pagetables.c
@@ -0,0 +1,46 @@
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/pgtable.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ ptdump_walk_pgd_level(m, NULL);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct dentry *pe;
+
+static int __init pt_dump_debug_init(void)
+{
+ pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
+ &ptdump_fops);
+ if (!pe)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void __exit pt_dump_debug_exit(void)
+{
+ debugfs_remove_recursive(pe);
+}
+
+module_init(pt_dump_debug_init);
+module_exit(pt_dump_debug_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0f1c6fc3d..4a6f1d9b5 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
ptdump_walk_pgd_level_core(m, pgd, false);
}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void)
{
ptdump_walk_pgd_level_core(NULL, NULL, true);
}
-#ifdef CONFIG_X86_PTDUMP
-static int ptdump_show(struct seq_file *m, void *v)
+static int __init pt_dump_init(void)
{
- ptdump_walk_pgd_level(m, NULL);
- return 0;
-}
-
-static int ptdump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
-static int pt_dump_init(void)
-{
-#ifdef CONFIG_X86_PTDUMP
- struct dentry *pe;
-#endif
-
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
@@ -468,13 +445,6 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif
-#ifdef CONFIG_X86_PTDUMP
- pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
- &ptdump_fops);
- if (!pe)
- return -ENOMEM;
-#endif
-
return 0;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8d2472be4..e830c71a1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -13,7 +13,6 @@
#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
-#include <linux/tuxonice.h> /* incremental image support */
#include <linux/uaccess.h> /* faulthandler_disabled() */
#include <asm/traps.h> /* dotraplinkage, ... */
@@ -663,10 +662,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long flags;
int sig;
- if (toi_make_writable(init_mm.pgd, address)) {
- return;
- }
-
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs)) {
/*
@@ -921,101 +916,10 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
}
}
-#ifdef CONFIG_TOI_INCREMENTAL
-/**
- * _toi_do_cbw - Do a copy-before-write before letting the faulting process continue
- */
-static void toi_do_cbw(struct page *page)
-{
- struct toi_cbw_state *state = this_cpu_ptr(&toi_cbw_states);
-
- state->active = 1;
- wmb();
-
- if (state->enabled && state->next && PageTOI_CBW(page)) {
- struct toi_cbw *this = state->next;
- memcpy(this->virt, page_address(page), PAGE_SIZE);
- this->pfn = page_to_pfn(page);
- state->next = this->next;
- }
-
- state->active = 0;
-}
-
-/**
- * _toi_make_writable - Defuse TOI's write protection
- */
-int _toi_make_writable(pte_t *pte)
-{
- struct page *page = pte_page(*pte);
- if (PageTOI_RO(page)) {
- pgd_t *pgd = __va(read_cr3());
- /*
- * If this is a TuxOnIce caused fault, we may not have permission to
- * write to a page needed to reset the permissions of the original
- * page. Use swapper_pg_dir to get around this.
- */
- load_cr3(swapper_pg_dir);
-
- set_pte_atomic(pte, pte_mkwrite(*pte));
- SetPageTOI_Dirty(page);
- ClearPageTOI_RO(page);
-
- toi_do_cbw(page);
-
- load_cr3(pgd);
- return 1;
- }
- return 0;
-}
-
-/**
- * toi_make_writable - Handle a (potential) fault caused by TOI's write protection
- *
- * Make a page writable that was protected. Might be because of a fault, or
- * because we're allocating it and want it to be untracked.
- *
- * Note that in the fault handling case, we don't care about the error code. If
- * called from the double fault handler, we won't have one. We just check to
- * see if the page was made RO by TOI, and mark it dirty/release the protection
- * if it was.
- */
-int toi_make_writable(pgd_t *pgd, unsigned long address)
-{
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = pgd + pgd_index(address);
- if (!pgd_present(*pgd))
- return 0;
-
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- return 0;
-
- if (pud_large(*pud))
- return _toi_make_writable((pte_t *) pud);
-
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- return 0;
-
- if (pmd_large(*pmd))
- return _toi_make_writable((pte_t *) pmd);
-
- pte = pte_offset_kernel(pmd, address);
- if (!pte_present(*pte))
- return 0;
-
- return _toi_make_writable(pte);
-}
-#endif
-
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
if ((error_code & PF_WRITE) && !pte_write(*pte))
- return 0;
+ return 0;
if ((error_code & PF_INSTR) && !pte_exec(*pte))
return 0;
@@ -1178,15 +1082,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
kmemcheck_hide(regs);
prefetchw(&mm->mmap_sem);
- /*
- * Detect and handle page faults due to TuxOnIce making pages read-only
- * so that it can create incremental images.
- *
- * Do it early to avoid double faults.
- */
- if (unlikely(toi_make_writable(init_mm.pgd, address)))
- return;
-
if (unlikely(kmmio_fault(regs, address)))
return;
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index ae9a37bf1..d8a798d8b 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -9,6 +9,7 @@
#include <linux/vmstat.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/memremap.h>
#include <asm/pgtable.h>
@@ -63,6 +64,16 @@ retry:
#endif
}
+static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+{
+ while ((*nr) - nr_start) {
+ struct page *page = pages[--(*nr)];
+
+ ClearPageReferenced(page);
+ put_page(page);
+ }
+}
+
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
@@ -71,7 +82,9 @@ retry:
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
+ struct dev_pagemap *pgmap = NULL;
unsigned long mask;
+ int nr_start = *nr;
pte_t *ptep;
mask = _PAGE_PRESENT|_PAGE_USER;
@@ -89,13 +102,21 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
return 0;
}
- if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+ if (pte_devmap(pte)) {
+ pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
+ if (unlikely(!pgmap)) {
+ undo_dev_pagemap(nr, nr_start, pages);
+ pte_unmap(ptep);
+ return 0;
+ }
+ } else if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
pte_unmap(ptep);
return 0;
}
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
get_page(page);
+ put_dev_pagemap(pgmap);
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -114,6 +135,32 @@ static inline void get_head_page_multiple(struct page *page, int nr)
SetPageReferenced(page);
}
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ int nr_start = *nr;
+ unsigned long pfn = pmd_pfn(pmd);
+ struct dev_pagemap *pgmap = NULL;
+
+ pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
+ do {
+ struct page *page = pfn_to_page(pfn);
+
+ pgmap = get_dev_pagemap(pfn, pgmap);
+ if (unlikely(!pgmap)) {
+ undo_dev_pagemap(nr, nr_start, pages);
+ return 0;
+ }
+ SetPageReferenced(page);
+ pages[*nr] = page;
+ get_page(page);
+ put_dev_pagemap(pgmap);
+ (*nr)++;
+ pfn++;
+ } while (addr += PAGE_SIZE, addr != end);
+ return 1;
+}
+
static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
@@ -126,9 +173,13 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
mask |= _PAGE_RW;
if ((pmd_flags(pmd) & mask) != mask)
return 0;
+
+ VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
+ if (pmd_devmap(pmd))
+ return __gup_device_huge_pmd(pmd, addr, end, pages, nr);
+
/* hugepages are never "special" */
VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
- VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
refs = 0;
head = pmd_page(pmd);
@@ -136,8 +187,6 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
- if (PageTail(page))
- get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
@@ -158,18 +207,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
next = pmd_addr_end(addr, end);
- /*
- * The pmd_trans_splitting() check below explains why
- * pmdp_splitting_flush has to flush the tlb, to stop
- * this gup-fast code from running while we set the
- * splitting bit in the pmd. Returning zero will take
- * the slow path that will call wait_split_huge_page()
- * if the pmd is still in splitting state. gup-fast
- * can't because it has irq disabled and
- * wait_split_huge_page() would never return as the
- * tlb flush IPI wouldn't run.
- */
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+ if (pmd_none(pmd))
return 0;
if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
/*
@@ -212,8 +250,6 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
- if (PageTail(page))
- get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 42982b26e..740d7ac03 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -173,10 +173,10 @@ static __init int setup_hugepagesz(char *opt)
}
__setup("hugepagesz=", setup_hugepagesz);
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
static __init int gigantic_pages_init(void)
{
- /* With CMA we can allocate gigantic pages at runtime */
+ /* With compaction or CMA we can allocate gigantic pages at runtime */
if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
return 0;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 695ac7d37..493f54172 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -150,10 +150,9 @@ static int page_size_mask;
static void __init probe_page_size_mask(void)
{
-#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) && !defined(CONFIG_TOI_INCREMENTAL)
+#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
/*
- * For CONFIG_DEBUG_PAGEALLOC or TuxOnIce's incremental image support,
- * identity mapping will use small pages.
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ec081fe0c..5488d2112 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
+#include <linux/memremap.h>
#include <linux/nmi.h>
#include <linux/gfp.h>
#include <linux/kcore.h>
@@ -714,6 +715,12 @@ static void __meminit free_pagetable(struct page *page, int order)
{
unsigned long magic;
unsigned int nr_pages = 1 << order;
+ struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
+
+ if (altmap) {
+ vmem_altmap_free(altmap, nr_pages);
+ return;
+ }
/* bootmem page has reserved flag */
if (PageReserved(page)) {
@@ -814,8 +821,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
if (phys_addr < (phys_addr_t)0x40000000)
return;
- if (IS_ALIGNED(addr, PAGE_SIZE) &&
- IS_ALIGNED(next, PAGE_SIZE)) {
+ if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
/*
* Do not free direct mapping pages since they were
* freed when offlining, or simplely not in use.
@@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ struct page *page = pfn_to_page(start_pfn);
+ struct vmem_altmap *altmap;
struct zone *zone;
int ret;
- zone = page_zone(pfn_to_page(start_pfn));
- kernel_physical_mapping_remove(start, start + size);
+ /* With altmap the first mapped page is offset from @start */
+ altmap = to_vmem_altmap((unsigned long) page);
+ if (altmap)
+ page += vmem_altmap_offset(altmap);
+ zone = page_zone(page);
ret = __remove_pages(zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
+ kernel_physical_mapping_remove(start, start + size);
return ret;
}
@@ -1236,7 +1248,7 @@ static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;
static int __meminit vmemmap_populate_hugepages(unsigned long start,
- unsigned long end, int node)
+ unsigned long end, int node, struct vmem_altmap *altmap)
{
unsigned long addr;
unsigned long next;
@@ -1259,7 +1271,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (pmd_none(*pmd)) {
void *p;
- p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+ p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
if (p) {
pte_t entry;
@@ -1280,7 +1292,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
addr_end = addr + PMD_SIZE;
p_end = p + PMD_SIZE;
continue;
- }
+ } else if (altmap)
+ return -ENOMEM; /* no fallback */
} else if (pmd_large(*pmd)) {
vmemmap_verify((pte_t *)pmd, node, addr, next);
continue;
@@ -1294,11 +1307,16 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
+ struct vmem_altmap *altmap = to_vmem_altmap(start);
int err;
if (cpu_has_pse)
- err = vmemmap_populate_hugepages(start, end, node);
- else
+ err = vmemmap_populate_hugepages(start, end, node, altmap);
+ else if (altmap) {
+ pr_err_once("%s: no cpu support for altmap allocations\n",
+ __func__);
+ err = -ENOMEM;
+ } else
err = vmemmap_populate_basepages(start, end, node);
if (!err)
sync_global_pgds(start, end - 1, 0);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index b9c78f3bc..0d8d53d1f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* Check if the request spans more than any BAR in the iomem resource
* tree.
*/
- WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
- KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+ if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
+ pr_warn("caller %pS mapping multiple BARs\n", caller);
return ret_addr;
err_free_area:
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 844b06d67..72bb52f93 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -69,14 +69,14 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;
- /*
- * 8 bits of randomness in 32bit mmaps, 20 address space bits
- * 28 bits of randomness in 64bit mmaps, 40 address space bits
- */
if (mmap_is_ia32())
- rnd = (unsigned long)get_random_int() % (1<<8);
+#ifdef CONFIG_COMPAT
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
+#else
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+#endif
else
- rnd = (unsigned long)get_random_int() % (1<<28);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c3b3f653e..d04f8094b 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -469,7 +469,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
{
int i, nid;
nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
- unsigned long start, end;
+ phys_addr_t start, end;
struct memblock_region *r;
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index db20ee9a4..9cf96d821 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -66,6 +66,9 @@ void update_page_count(int level, unsigned long pages)
static void split_page_count(int level)
{
+ if (direct_pages_count[level] == 0)
+ return;
+
direct_pages_count[level]--;
direct_pages_count[level - 1] += PTRS_PER_PTE;
}
@@ -129,14 +132,16 @@ within(unsigned long addr, unsigned long start, unsigned long end)
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
- unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
+ const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
+ void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
void *vend = vaddr + size;
- void *p;
+
+ if (p >= vend)
+ return;
mb();
- for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- p < vend; p += boot_cpu_data.x86_clflush_size)
+ for (; p < vend; p += clflush_size)
clflushopt(p);
mb();
@@ -414,24 +419,30 @@ pmd_t *lookup_pmd_address(unsigned long address)
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
unsigned long virt_addr = (unsigned long)__virt_addr;
- unsigned long phys_addr, offset;
+ phys_addr_t phys_addr;
+ unsigned long offset;
enum pg_level level;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
+ /*
+ * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+ * before being left-shifted PAGE_SHIFT bits -- this trick is to
+ * make 32-PAE kernel work correctly.
+ */
switch (level) {
case PG_LEVEL_1G:
- phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PUD_PAGE_MASK;
break;
case PG_LEVEL_2M:
- phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PMD_PAGE_MASK;
break;
default:
- phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
offset = virt_addr & ~PAGE_MASK;
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 188e3e07e..f4ae536b0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -12,6 +12,7 @@
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/fs.h>
@@ -586,7 +587,7 @@ int free_memtype(u64 start, u64 end)
entry = rbt_memtype_erase(start, end);
spin_unlock(&memtype_lock);
- if (!entry) {
+ if (IS_ERR(entry)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
@@ -949,7 +950,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
}
int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn)
+ pfn_t pfn)
{
enum page_cache_mode pcm;
@@ -957,7 +958,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
return 0;
/* Set prot based on lookup */
- pcm = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
+ pcm = lookup_memtype(pfn_t_to_phys(pfn));
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
@@ -992,6 +993,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
vma->vm_flags &= ~VM_PAT;
}
+/*
+ * untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
+ * with the old vma after its pfnmap page table has been removed. The new
+ * vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
+ */
+void untrack_pfn_moved(struct vm_area_struct *vma)
+{
+ vma->vm_flags &= ~VM_PAT;
+}
+
pgprot_t pgprot_writecombine(pgprot_t prot)
{
return __pgprot(pgprot_val(prot) |
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 639310803..2f7702253 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
return last_lower; /* Returns NULL if there is no overlap */
}
-static struct memtype *memtype_rb_exact_match(struct rb_root *root,
- u64 start, u64 end)
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_rb_match(struct rb_root *root,
+ u64 start, u64 end, int match_type)
{
struct memtype *match;
@@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
while (match != NULL && match->start < end) {
struct rb_node *node;
- if (match->start == start && match->end == end)
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (match->start == start) && (match->end == end))
+ return match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (match->start < start) && (match->end == end))
return match;
node = rb_next(&match->rb);
@@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
match = NULL;
}
- return NULL; /* Returns NULL if there is no exact match */
+ return NULL; /* Returns NULL if there is no match */
}
static int memtype_rb_check_conflict(struct rb_root *root,
@@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
struct memtype *data;
- data = memtype_rb_exact_match(&memtype_rbroot, start, end);
- if (!data)
- goto out;
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ data = memtype_rb_match(&memtype_rbroot, start, end,
+ MEMTYPE_EXACT_MATCH);
+ if (!data) {
+ data = memtype_rb_match(&memtype_rbroot, start, end,
+ MEMTYPE_END_MATCH);
+ if (!data)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (data->start == start) {
+ /* munmap: erase this node */
+ rb_erase_augmented(&data->rb, &memtype_rbroot,
+ &memtype_rb_augment_cb);
+ } else {
+ /* mremap: update the end value of this node */
+ rb_erase_augmented(&data->rb, &memtype_rbroot,
+ &memtype_rb_augment_cb);
+ data->end = start;
+ data->subtree_max_end = data->end;
+ memtype_rb_insert(&memtype_rbroot, data);
+ return NULL;
+ }
- rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
-out:
return data;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fb0a9dd1d..4eb287e25 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -414,7 +414,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*ptep = entry;
- pte_update_defer(vma->vm_mm, address, ptep);
+ pte_update(vma->vm_mm, address, ptep);
}
return changed;
@@ -431,7 +431,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*pmdp = entry;
- pmd_update_defer(vma->vm_mm, address, pmdp);
/*
* We had a write-protection fault here and changed the pmd
* to to more permissive. No need to flush the TLB for that,
@@ -469,9 +468,6 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsigned long *)pmdp);
- if (ret)
- pmd_update(vma->vm_mm, addr, pmdp);
-
return ret;
}
#endif
@@ -509,20 +505,6 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma,
return young;
}
-
-void pmdp_splitting_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- int set;
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- set = !test_and_set_bit(_PAGE_BIT_SPLITTING,
- (unsigned long *)pmdp);
- if (set) {
- pmd_update(vma->vm_mm, address, pmdp);
- /* need tlb flush only to serialize against gup-fast */
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
- }
-}
#endif
/**
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 90555bf60..92e2eacb3 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -31,7 +31,7 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void)
{
- if (cpu_has_nx && !disable_nx)
+ if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
__supported_pte_mask |= _PAGE_NX;
else
__supported_pte_mask &= ~_PAGE_NX;
@@ -39,7 +39,7 @@ void x86_configure_nx(void)
void __init x86_report_nx(void)
{
- if (!cpu_has_nx) {
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
"missing in CPU!\n");
} else {
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index c2aea63be..b5f821881 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
(unsigned long long)start, (unsigned long long)end - 1);
+ max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
+
return 0;
out_err_bad_srat:
bad_srat();