From d0b2f91bede3bd5e3d24dd6803e56eee959c1797 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Thu, 20 Oct 2016 00:10:27 -0300 Subject: Linux-libre 4.8.2-gnu --- arch/arm64/mm/cache.S | 2 +- arch/arm64/mm/dma-mapping.c | 103 ++++++++++++++--------------- arch/arm64/mm/dump.c | 38 ++++++----- arch/arm64/mm/fault.c | 57 +++++++++++++--- arch/arm64/mm/init.c | 15 ++--- arch/arm64/mm/mmu.c | 154 +++++++++++++------------------------------- arch/arm64/mm/numa.c | 30 +++++---- 7 files changed, 196 insertions(+), 203 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 50ff9ba3a..07d7352d7 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -52,7 +52,7 @@ ENTRY(__flush_cache_user_range) sub x3, x2, #1 bic x4, x0, x3 1: -USER(9f, dc cvau, x4 ) // clean D line to PoU +user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 1b diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c566ec837..c4284c432 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -29,10 +30,12 @@ #include -static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, +static int swiotlb __read_mostly; + +static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) { - if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) return pgprot_writecombine(prot); return prot; } @@ -88,7 +91,7 @@ static int __free_from_pool(void *start, size_t size) static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); @@ -118,7 +121,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, static void __dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { bool freed; phys_addr_t paddr = dma_to_phys(dev, dma_handle); @@ -137,7 +140,7 @@ static void __dma_free_coherent(struct device *dev, size_t size, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { struct page *page; void *ptr, *coherent_ptr; @@ -185,7 +188,7 @@ no_mem: static void __dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); @@ -202,7 +205,7 @@ static void __dma_free(struct device *dev, size_t size, static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dev_addr; @@ -216,7 +219,7 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { if (!is_device_dma_coherent(dev)) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); @@ -225,7 +228,7 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i, ret; @@ -242,7 +245,7 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, static void __swiotlb_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -303,7 +306,7 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, static int __swiotlb_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = -ENXIO; unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> @@ -330,7 +333,7 @@ static int __swiotlb_mmap(struct device *dev, static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); @@ -341,6 +344,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } +static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) +{ + if (swiotlb) + return swiotlb_dma_supported(hwdev, mask); + return 1; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -354,7 +364,7 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_single_for_device = __swiotlb_sync_single_for_device, .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, .sync_sg_for_device = __swiotlb_sync_sg_for_device, - .dma_supported = swiotlb_dma_supported, + .dma_supported = __swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; @@ -425,21 +435,21 @@ out: static void *__dummy_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { return NULL; } static void __dummy_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { } static int __dummy_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { return -ENXIO; } @@ -447,20 +457,20 @@ static int __dummy_mmap(struct device *dev, static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return DMA_ERROR_CODE; } static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { } static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return 0; } @@ -468,7 +478,7 @@ static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, static void __dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { } @@ -513,6 +523,9 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb = 1; + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -540,7 +553,7 @@ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { bool coherent = is_device_dma_coherent(dev); int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); @@ -600,7 +613,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, } static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) + dma_addr_t handle, unsigned long attrs) { size_t iosize = size; @@ -616,7 +629,7 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, * Hence how dodgy the below logic looks... */ if (__in_atomic_pool(cpu_addr, size)) { - iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_from_pool(cpu_addr, size); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr); @@ -626,14 +639,14 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, iommu_dma_free(dev, area->pages, iosize, &handle); dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else { - iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_pages(virt_to_page(cpu_addr), get_order(size)); } } static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { struct vm_struct *area; int ret; @@ -653,7 +666,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, - size_t size, struct dma_attrs *attrs) + size_t size, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct vm_struct *area = find_vm_area(cpu_addr); @@ -694,14 +707,14 @@ static void __iommu_sync_single_for_device(struct device *dev, static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { bool coherent = is_device_dma_coherent(dev); int prot = dma_direction_to_prot(dir, coherent); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); if (!iommu_dma_mapping_error(dev, dev_addr) && - !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_single_for_device(dev, dev_addr, size, dir); return dev_addr; @@ -709,9 +722,9 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); @@ -747,11 +760,11 @@ static void __iommu_sync_sg_for_device(struct device *dev, static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { bool coherent = is_device_dma_coherent(dev); - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_device(dev, sgl, nelems, dir); return iommu_dma_map_sg(dev, sgl, nelems, @@ -761,9 +774,9 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, static void __iommu_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); @@ -848,15 +861,16 @@ static int __iommu_attach_notifier(struct notifier_block *nb, { struct iommu_dma_notifier_data *master, *tmp; - if (action != BUS_NOTIFY_ADD_DEVICE) + if (action != BUS_NOTIFY_BIND_DRIVER) return 0; mutex_lock(&iommu_dma_notifier_lock); list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { - if (do_iommu_attach(master->dev, master->ops, - master->dma_base, master->size)) { + if (data == master->dev && do_iommu_attach(master->dev, + master->ops, master->dma_base, master->size)) { list_del(&master->list); kfree(master); + break; } } mutex_unlock(&iommu_dma_notifier_lock); @@ -870,17 +884,8 @@ static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) if (!nb) return -ENOMEM; - /* - * The device must be attached to a domain before the driver probe - * routine gets a chance to start allocating DMA buffers. However, - * the IOMMU driver also needs a chance to configure the iommu_group - * via its add_device callback first, so we need to make the attach - * happen between those two points. Since the IOMMU core uses a bus - * notifier with default priority for add_device, do the same but - * with a lower priority to ensure the appropriate ordering. - */ + nb->notifier_call = __iommu_attach_notifier; - nb->priority = -100; ret = bus_register_notifier(bus, nb); if (ret) { @@ -904,10 +909,6 @@ static int __init __iommu_dma_init(void) if (!ret) ret = register_iommu_dma_ops_notifier(&pci_bus_type); #endif - - /* handle devices queued before this arch_initcall */ - if (!ret) - __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL); return ret; } arch_initcall(__iommu_dma_init); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index ccfde237d..9c3e75df2 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -27,11 +27,7 @@ #include #include #include - -struct addr_marker { - unsigned long start_address; - const char *name; -}; +#include static const struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN @@ -246,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) { - pte_t *pte = pte_offset_kernel(pmd, 0); + pte_t *pte = pte_offset_kernel(pmd, 0UL); unsigned long addr; unsigned i; @@ -258,7 +254,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { - pmd_t *pmd = pmd_offset(pud, 0); + pmd_t *pmd = pmd_offset(pud, 0UL); unsigned long addr; unsigned i; @@ -275,7 +271,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(pgd, 0UL); unsigned long addr; unsigned i; @@ -290,7 +286,8 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, + unsigned long start) { pgd_t *pgd = pgd_offset(mm, 0UL); unsigned i; @@ -309,12 +306,13 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st static int ptdump_show(struct seq_file *m, void *v) { + struct ptdump_info *info = m->private; struct pg_state st = { .seq = m, - .marker = address_markers, + .marker = info->markers, }; - walk_pgd(&st, &init_mm, VA_START); + walk_pgd(&st, info->mm, info->base_addr); note_page(&st, 0, 0, 0); return 0; @@ -322,7 +320,7 @@ static int ptdump_show(struct seq_file *m, void *v) static int ptdump_open(struct inode *inode, struct file *file) { - return single_open(file, ptdump_show, NULL); + return single_open(file, ptdump_show, inode->i_private); } static const struct file_operations ptdump_fops = { @@ -332,7 +330,7 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static int ptdump_init(void) +int ptdump_register(struct ptdump_info *info, const char *name) { struct dentry *pe; unsigned i, j; @@ -342,8 +340,18 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; - pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, - &ptdump_fops); + pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); return pe ? 0 : -ENOMEM; } + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = VA_START, +}; + +static int ptdump_init(void) +{ + return ptdump_register(&kernel_ptdump_info, "kernel_page_tables"); +} device_initcall(ptdump_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b1166d1e5..05d2bd776 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -41,6 +41,28 @@ static const char *fault_name(unsigned int esr); +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, esr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + return 0; +} +#endif + /* * Dump out the page tables associated with 'addr' in mm 'mm'. */ @@ -131,6 +153,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma, } #endif +static bool is_el1_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; +} + /* * The kernel tried to access some page that wasn't present. */ @@ -139,8 +166,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, { /* * Are we prepared to handle this kernel fault? + * We are almost certainly not prepared to handle instruction faults. */ - if (fixup_exception(regs)) + if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; /* @@ -202,8 +230,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -#define ESR_LNX_EXEC (1 << 24) - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) @@ -233,7 +259,7 @@ good_area: goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -242,12 +268,18 @@ out: return fault; } -static inline int permission_fault(unsigned int esr) +static inline bool is_permission_fault(unsigned int esr) { - unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; - return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); + return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) || + (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM); +} + +static bool is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, @@ -259,6 +291,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (notify_page_fault(regs, esr)) + return 0; + tsk = current; mm = tsk->mm; @@ -272,18 +307,21 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; - if (esr & ESR_LNX_EXEC) { + if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } - if (permission_fault(esr) && (addr < USER_DS)) { + if (is_permission_fault(esr) && (addr < USER_DS)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + if (is_el1_instruction_abort(esr)) + die("Attempting to execute userspace memory", regs, esr); + if (!search_exception_tables(regs->pc)) die("Accessing user space memory outside uaccess.h routines", regs, esr); } @@ -630,6 +668,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } +NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN void cpu_enable_pan(void *__unused) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d45f86270..bbb7ee76e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -160,12 +160,10 @@ static void __init arm64_memory_present(void) static void __init arm64_memory_present(void) { struct memblock_region *reg; - int nid = 0; for_each_memblock(memory, reg) { -#ifdef CONFIG_NUMA - nid = reg->nid; -#endif + int nid = memblock_get_region_node(reg); + memory_present(nid, memblock_region_memory_base_pfn(reg), memblock_region_memory_end_pfn(reg)); } @@ -226,7 +224,7 @@ void __init arm64_memblock_init(void) * via the linear mapping. */ if (memory_limit != (phys_addr_t)ULLONG_MAX) { - memblock_enforce_memory_limit(memory_limit); + memblock_mem_limit_remove_map(memory_limit); memblock_add(__pa(_text), (u64)(_end - _text)); } @@ -403,7 +401,8 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - swiotlb_init(1); + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb_init(1); set_max_mapnr(pfn_to_page(max_pfn) - mem_map); @@ -430,9 +429,9 @@ void __init mem_init(void) pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(_text, __start_rodata)); + MLK_ROUNDUP(_text, _etext)); pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(__start_rodata, _etext)); + MLK_ROUNDUP(__start_rodata, __init_begin)); pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__init_begin, __init_end)); pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3e90a2cad..4989948d1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -77,7 +77,6 @@ static phys_addr_t __init early_pgtable_alloc(void) void *ptr; phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - BUG_ON(!phys); /* * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE @@ -97,24 +96,6 @@ static phys_addr_t __init early_pgtable_alloc(void) return phys; } -/* - * remap a PMD into pages - */ -static void split_pmd(pmd_t *pmd, pte_t *pte) -{ - unsigned long pfn = pmd_pfn(*pmd); - int i = 0; - - do { - /* - * Need to have the least restrictive permissions available - * permissions will be fixed up later - */ - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); - pfn++; - } while (pte++, i++, i < PTRS_PER_PTE); -} - static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, @@ -122,15 +103,13 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, { pte_t *pte; - if (pmd_none(*pmd) || pmd_sect(*pmd)) { + BUG_ON(pmd_sect(*pmd)); + if (pmd_none(*pmd)) { phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(); pte = pte_set_fixmap(pte_phys); - if (pmd_sect(*pmd)) - split_pmd(pmd, pte); __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); - flush_tlb_all(); pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); @@ -144,41 +123,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte_clear_fixmap(); } -static void split_pud(pud_t *old_pud, pmd_t *pmd) -{ - unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; - pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); - int i = 0; - - do { - set_pmd(pmd, __pmd(addr | pgprot_val(prot))); - addr += PMD_SIZE; - } while (pmd++, i++, i < PTRS_PER_PMD); -} - -#ifdef CONFIG_DEBUG_PAGEALLOC -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - - /* - * If debug_page_alloc is enabled we must map the linear map - * using pages. However, other mappings created by - * create_mapping_noalloc must use sections in some cases. Allow - * sections to be used in those cases, where no pgtable_alloc - * function is provided. - */ - return !pgtable_alloc || !debug_pagealloc_enabled(); -} -#else -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - return true; -} -#endif - static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pmd_t *pmd; unsigned long next; @@ -186,20 +134,13 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* * Check for initial section mappings in the pgd/pud and remove them. */ - if (pud_none(*pud) || pud_sect(*pud)) { + BUG_ON(pud_sect(*pud)); + if (pud_none(*pud)) { phys_addr_t pmd_phys; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(); pmd = pmd_set_fixmap(pmd_phys); - if (pud_sect(*pud)) { - /* - * need to have the 1G of mappings continue to be - * present - */ - split_pud(pud, pmd); - } __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); - flush_tlb_all(); pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); @@ -209,7 +150,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - block_mappings_allowed(pgtable_alloc)) { + allow_block_mappings) { pmd_t old_pmd =*pmd; pmd_set_huge(pmd, phys, prot); /* @@ -248,7 +189,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pud_t *pud; unsigned long next; @@ -268,8 +210,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && - block_mappings_allowed(pgtable_alloc)) { + if (use_1G_block(addr, next, phys) && allow_block_mappings) { pud_t old_pud = *pud; pud_set_huge(pud, phys, prot); @@ -290,7 +231,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, } } else { alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc); + pgtable_alloc, allow_block_mappings); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -298,15 +239,14 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, pud_clear_fixmap(); } -/* - * Create the page directory entries and any necessary page tables for the - * mapping specified by 'md'. - */ -static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { unsigned long addr, length, end, next; + pgd_t *pgd = pgd_offset_raw(pgdir, virt); /* * If the virtual and physical address don't have the same offset @@ -322,29 +262,23 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, + allow_block_mappings); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static phys_addr_t late_pgtable_alloc(void) +static phys_addr_t pgd_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); - BUG_ON(!ptr); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); return __pa(ptr); } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*alloc)(void)) -{ - init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); -} - /* * This function can only be used to modify existing table entries, * without allocating new levels of table. Note that this permits the @@ -358,16 +292,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot) + pgprot_t prot, bool allow_block_mappings) { + BUG_ON(mm == &init_mm); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - late_pgtable_alloc); + pgd_pgtable_alloc, allow_block_mappings); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -380,51 +315,54 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - late_pgtable_alloc); + NULL, !debug_pagealloc_enabled()); } static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { unsigned long kernel_start = __pa(_text); - unsigned long kernel_end = __pa(_etext); + unsigned long kernel_end = __pa(__init_begin); /* * Take care not to create a writable alias for the * read-only text and rodata sections of the kernel image. */ - /* No overlap with the kernel text */ + /* No overlap with the kernel text/rodata */ if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); return; } /* - * This block overlaps the kernel text mapping. + * This block overlaps the kernel text/rodata mappings. * Map the portion(s) which don't overlap. */ if (start < kernel_start) __create_pgd_mapping(pgd, start, __phys_to_virt(start), kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); if (kernel_end < end) __create_pgd_mapping(pgd, kernel_end, __phys_to_virt(kernel_end), end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); /* - * Map the linear alias of the [_text, _etext) interval as + * Map the linear alias of the [_text, __init_begin) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. */ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); } static void __init map_mem(pgd_t *pgd) @@ -449,14 +387,14 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)__start_rodata - (unsigned long)_text; + section_size = (unsigned long)_etext - (unsigned long)_text; create_mapping_late(__pa(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use _etext rather than __end_rodata to - * cover NOTES and EXCEPTION_TABLE. + * mark .rodata as read only. Use __init_begin rather than __end_rodata + * to cover NOTES and EXCEPTION_TABLE. */ - section_size = (unsigned long)_etext - (unsigned long)__start_rodata; + section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); } @@ -481,7 +419,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); vma->addr = va_start; vma->phys_addr = pa_start; @@ -499,8 +437,8 @@ static void __init map_kernel(pgd_t *pgd) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; - map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, &vmlinux_init); map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 98dc1047f..5bb15eab6 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,11 +17,14 @@ * along with this program. If not, see . */ +#include #include #include #include #include +#include + struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); nodemask_t numa_nodes_parsed __initdata; @@ -29,7 +32,7 @@ static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; static int numa_distance_cnt; static u8 *numa_distance; -static int numa_off; +static bool numa_off; static __init int numa_parse_early_param(char *opt) { @@ -37,7 +40,7 @@ static __init int numa_parse_early_param(char *opt) return -EINVAL; if (!strncmp(opt, "off", 3)) { pr_info("%s\n", "NUMA turned off"); - numa_off = 1; + numa_off = true; } return 0; } @@ -131,25 +134,25 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) * numa_add_memblk - Set node id to memblk * @nid: NUMA node ID of the new memblk * @start: Start address of the new memblk - * @size: Size of the new memblk + * @end: End address of the new memblk * * RETURNS: * 0 on success, -errno on failure. */ -int __init numa_add_memblk(int nid, u64 start, u64 size) +int __init numa_add_memblk(int nid, u64 start, u64 end) { int ret; - ret = memblock_set_node(start, size, &memblock.memory, nid); + ret = memblock_set_node(start, (end - start), &memblock.memory, nid); if (ret < 0) { pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n", - start, (start + size - 1), nid); + start, (end - 1), nid); return ret; } node_set(nid, numa_nodes_parsed); pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n", - start, (start + size - 1), nid); + start, (end - 1), nid); return ret; } @@ -362,12 +365,15 @@ static int __init dummy_numa_init(void) int ret; struct memblock_region *mblk; - pr_info("%s\n", "No NUMA configuration found"); + if (numa_off) + pr_info("NUMA disabled\n"); /* Forced off on command line. */ + else + pr_info("No NUMA configuration found\n"); pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", 0LLU, PFN_PHYS(max_pfn) - 1); for_each_memblock(memory, mblk) { - ret = numa_add_memblk(0, mblk->base, mblk->size); + ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); if (!ret) continue; @@ -375,7 +381,7 @@ static int __init dummy_numa_init(void) return ret; } - numa_off = 1; + numa_off = true; return 0; } @@ -388,7 +394,9 @@ static int __init dummy_numa_init(void) void __init arm64_numa_init(void) { if (!numa_off) { - if (!numa_init(of_numa_init)) + if (!acpi_disabled && !numa_init(arm64_acpi_numa_init)) + return; + if (acpi_disabled && !numa_init(of_numa_init)) return; } -- cgit v1.2.3-54-g00ecf