84 files changed, 25183 insertions, 0 deletions
diff --git a/arch/arm/kernel/.gitignore b/arch/arm/kernel/.gitignore
new file mode 100644
index 000000000..c5f676c3c
--- /dev/null
+++ b/arch/arm/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
new file mode 100644
index 000000000..752725dcb
--- /dev/null
+++ b/arch/arm/kernel/Makefile
@@ -0,0 +1,93 @@
+#
+# Makefile for the linux kernel.
+#
+
+CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
+AFLAGS_head.o        := -DTEXT_OFFSET=$(TEXT_OFFSET)
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+CFLAGS_REMOVE_patch.o = -pg
+endif
+
+CFLAGS_REMOVE_return_address.o = -pg
+
+# Object file lists.
+
+obj-y		:= elf.o entry-common.o irq.o opcodes.o \
+		   process.o ptrace.o reboot.o return_address.o \
+		   setup.o signal.o sigreturn_codes.o \
+		   stacktrace.o sys_arm.o time.o traps.o
+
+obj-$(CONFIG_ATAGS)		+= atags_parse.o
+obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
+obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o
+
+ifeq ($(CONFIG_CPU_V7M),y)
+obj-y		+= entry-v7m.o v7m.o
+else
+obj-y		+= entry-armv.o
+endif
+
+obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
+obj-$(CONFIG_ISA_DMA_API)	+= dma.o
+obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
+obj-$(CONFIG_MODULES)		+= armksyms.o module.o
+obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
+obj-$(CONFIG_PCI)		+= bios32.o isa.o
+obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
+obj-$(CONFIG_HIBERNATION)	+= hibernate.o
+obj-$(CONFIG_SMP)		+= smp.o
+ifdef CONFIG_MMU
+obj-$(CONFIG_SMP)		+= smp_tlb.o
+endif
+obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
+obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
+obj-$(CONFIG_ARM_ARCH_TIMER)	+= arch_timer.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= entry-ftrace.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o
+obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o insn.o patch.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+# Main staffs in KPROBES are in arch/arm/probes/ .
+obj-$(CONFIG_KPROBES)		+= patch.o insn.o
+obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
+obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
+obj-$(CONFIG_KGDB)		+= kgdb.o patch.o
+obj-$(CONFIG_ARM_UNWIND)	+= unwind.o
+obj-$(CONFIG_HAVE_TCM)		+= tcm.o
+obj-$(CONFIG_OF)		+= devtree.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_SWP_EMULATE)	+= swp_emulate.o
+CFLAGS_swp_emulate.o		:= -Wa,-march=armv7-a
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
+
+obj-$(CONFIG_CPU_XSCALE)	+= xscale-cp0.o
+obj-$(CONFIG_CPU_XSC3)		+= xscale-cp0.o
+obj-$(CONFIG_CPU_MOHAWK)	+= xscale-cp0.o
+obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
+obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
+obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
+obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
+CFLAGS_pj4-cp0.o		:= -marm
+AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
+obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
+obj-$(CONFIG_VDSO)		+= vdso.o
+
+ifneq ($(CONFIG_ARCH_EBSA110),y)
+  obj-y		+= io.o
+endif
+
+head-y			:= head$(MMUEXT).o
+obj-$(CONFIG_DEBUG_LL)	+= debug.o
+obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+
+obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
+ifeq ($(CONFIG_ARM_PSCI),y)
+obj-y				+= psci.o psci-call.o
+obj-$(CONFIG_SMP)		+= psci_smp.o
+endif
+
+extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
new file mode 100644
index 000000000..1791f12c1
--- /dev/null
+++ b/arch/arm/kernel/arch_timer.c
@@ -0,0 +1,44 @@
+/*
+ *  linux/arch/arm/kernel/arch_timer.c
+ *
+ *  Copyright (C) 2011 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+
+#include <asm/delay.h>
+
+#include <clocksource/arm_arch_timer.h>
+
+static unsigned long arch_timer_read_counter_long(void)
+{
+	return arch_timer_read_counter();
+}
+
+static struct delay_timer arch_delay_timer;
+
+static void __init arch_timer_delay_timer_register(void)
+{
+	/* Use the architected timer for the delay loop. */
+	arch_delay_timer.read_current_timer = arch_timer_read_counter_long;
+	arch_delay_timer.freq = arch_timer_get_rate();
+	register_current_timer_delay(&arch_delay_timer);
+}
+
+int __init arch_timer_arch_init(void)
+{
+        u32 arch_timer_rate = arch_timer_get_rate();
+
+	if (arch_timer_rate == 0)
+		return -ENXIO;
+
+	arch_timer_delay_timer_register();
+
+	return 0;
+}
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
new file mode 100644
index 000000000..a88671cfe
--- /dev/null
+++ b/arch/arm/kernel/armksyms.c
@@ -0,0 +1,171 @@
+/*
+ *  linux/arch/arm/kernel/armksyms.c
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/cryptohash.h>
+#include <linux/delay.h>
+#include <linux/in6.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+
+#include <asm/checksum.h>
+#include <asm/ftrace.h>
+
+/*
+ * libgcc functions - functions that are used internally by the
+ * compiler...  (prototypes are not correct though, but that
+ * doesn't really matter since they're not versioned).
+ */
+extern void __ashldi3(void);
+extern void __ashrdi3(void);
+extern void __divsi3(void);
+extern void __lshrdi3(void);
+extern void __modsi3(void);
+extern void __muldi3(void);
+extern void __ucmpdi2(void);
+extern void __udivsi3(void);
+extern void __umodsi3(void);
+extern void __do_div64(void);
+extern void __bswapsi2(void);
+extern void __bswapdi2(void);
+
+extern void __aeabi_idiv(void);
+extern void __aeabi_idivmod(void);
+extern void __aeabi_lasr(void);
+extern void __aeabi_llsl(void);
+extern void __aeabi_llsr(void);
+extern void __aeabi_lmul(void);
+extern void __aeabi_uidiv(void);
+extern void __aeabi_uidivmod(void);
+extern void __aeabi_ulcmp(void);
+
+extern void fpundefinstr(void);
+
+	/* platform dependent support */
+EXPORT_SYMBOL(arm_delay_ops);
+
+	/* networking */
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+EXPORT_SYMBOL(__csum_ipv6_magic);
+
+	/* io */
+#ifndef __raw_readsb
+EXPORT_SYMBOL(__raw_readsb);
+#endif
+#ifndef __raw_readsw
+EXPORT_SYMBOL(__raw_readsw);
+#endif
+#ifndef __raw_readsl
+EXPORT_SYMBOL(__raw_readsl);
+#endif
+#ifndef __raw_writesb
+EXPORT_SYMBOL(__raw_writesb);
+#endif
+#ifndef __raw_writesw
+EXPORT_SYMBOL(__raw_writesw);
+#endif
+#ifndef __raw_writesl
+EXPORT_SYMBOL(__raw_writesl);
+#endif
+
+	/* string / mem functions */
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(__memzero);
+
+#ifdef CONFIG_MMU
+EXPORT_SYMBOL(copy_page);
+
+EXPORT_SYMBOL(__copy_from_user);
+EXPORT_SYMBOL(__copy_to_user);
+EXPORT_SYMBOL(__clear_user);
+
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__get_user_8);
+
+#ifdef __ARMEB__
+EXPORT_SYMBOL(__get_user_64t_1);
+EXPORT_SYMBOL(__get_user_64t_2);
+EXPORT_SYMBOL(__get_user_64t_4);
+EXPORT_SYMBOL(__get_user_32t_8);
+#endif
+
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+#endif
+
+	/* gcc lib functions */
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__ucmpdi2);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__umodsi3);
+EXPORT_SYMBOL(__do_div64);
+EXPORT_SYMBOL(__bswapsi2);
+EXPORT_SYMBOL(__bswapdi2);
+
+#ifdef CONFIG_AEABI
+EXPORT_SYMBOL(__aeabi_idiv);
+EXPORT_SYMBOL(__aeabi_idivmod);
+EXPORT_SYMBOL(__aeabi_lasr);
+EXPORT_SYMBOL(__aeabi_llsl);
+EXPORT_SYMBOL(__aeabi_llsr);
+EXPORT_SYMBOL(__aeabi_lmul);
+EXPORT_SYMBOL(__aeabi_uidiv);
+EXPORT_SYMBOL(__aeabi_uidivmod);
+EXPORT_SYMBOL(__aeabi_ulcmp);
+#endif
+
+	/* bitops */
+EXPORT_SYMBOL(_set_bit);
+EXPORT_SYMBOL(_test_and_set_bit);
+EXPORT_SYMBOL(_clear_bit);
+EXPORT_SYMBOL(_test_and_clear_bit);
+EXPORT_SYMBOL(_change_bit);
+EXPORT_SYMBOL(_test_and_change_bit);
+EXPORT_SYMBOL(_find_first_zero_bit_le);
+EXPORT_SYMBOL(_find_next_zero_bit_le);
+EXPORT_SYMBOL(_find_first_bit_le);
+EXPORT_SYMBOL(_find_next_bit_le);
+
+#ifdef __ARMEB__
+EXPORT_SYMBOL(_find_first_zero_bit_be);
+EXPORT_SYMBOL(_find_next_zero_bit_be);
+EXPORT_SYMBOL(_find_first_bit_be);
+EXPORT_SYMBOL(_find_next_bit_be);
+#endif
+
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_OLD_MCOUNT
+EXPORT_SYMBOL(mcount);
+#endif
+EXPORT_SYMBOL(__gnu_mcount_nc);
+#endif
+
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+EXPORT_SYMBOL(__pv_phys_pfn_offset);
+EXPORT_SYMBOL(__pv_offset);
+#endif
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
new file mode 100644
index 000000000..871b8267d
--- /dev/null
+++ b/arch/arm/kernel/asm-offsets.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 1995-2003 Russell King
+ *               2001-2002 Keith Owens
+ *     
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/compiler.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_KVM_ARM_HOST
+#include <linux/kvm_host.h>
+#endif
+#include <asm/cacheflush.h>
+#include <asm/glue-df.h>
+#include <asm/glue-pf.h>
+#include <asm/mach/arch.h>
+#include <asm/thread_info.h>
+#include <asm/memory.h>
+#include <asm/procinfo.h>
+#include <asm/suspend.h>
+#include <asm/vdso_datapage.h>
+#include <asm/hardware/cache-l2x0.h>
+#include <linux/kbuild.h>
+
+/*
+ * Make sure that the compiler and target are compatible.
+ */
+#if defined(__APCS_26__)
+#error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32
+#endif
+/*
+ * GCC 3.0, 3.1: general bad code generation.
+ * GCC 3.2.0: incorrect function argument offset calculation.
+ * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
+ *            (http://gcc.gnu.org/PR8896) and incorrect structure
+ *	      initialisation in fs/jffs2/erase.c
+ * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854
+ *	      miscompiles find_get_entry(), and can result in EXT3 and EXT4
+ *	      filesystem corruption (possibly other FS too).
+ */
+#ifdef __GNUC__
+#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+#error Your compiler is too buggy; it is known to miscompile kernels.
+#error    Known good compilers: 3.3, 4.x
+#endif
+#if GCC_VERSION >= 40800 && GCC_VERSION < 40803
+#error Your compiler is too buggy; it is known to miscompile kernels
+#error and result in filesystem corruption and oopses.
+#endif
+#endif
+
+int main(void)
+{
+  DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm));
+#ifdef CONFIG_CC_STACKPROTECTOR
+  DEFINE(TSK_STACK_CANARY,	offsetof(struct task_struct, stack_canary));
+#endif
+  BLANK();
+  DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
+  DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
+  DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
+  DEFINE(TI_TASK,		offsetof(struct thread_info, task));
+  DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
+  DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
+  DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
+  DEFINE(TI_USED_CP,		offsetof(struct thread_info, used_cp));
+  DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
+  DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
+#ifdef CONFIG_VFP
+  DEFINE(TI_VFPSTATE,		offsetof(struct thread_info, vfpstate));
+#ifdef CONFIG_SMP
+  DEFINE(VFP_CPU,		offsetof(union vfp_state, hard.cpu));
+#endif
+#endif
+#ifdef CONFIG_ARM_THUMBEE
+  DEFINE(TI_THUMBEE_STATE,	offsetof(struct thread_info, thumbee_state));
+#endif
+#ifdef CONFIG_IWMMXT
+  DEFINE(TI_IWMMXT_STATE,	offsetof(struct thread_info, fpstate.iwmmxt));
+#endif
+#ifdef CONFIG_CRUNCH
+  DEFINE(TI_CRUNCH_STATE,	offsetof(struct thread_info, crunchstate));
+#endif
+  BLANK();
+  DEFINE(S_R0,			offsetof(struct pt_regs, ARM_r0));
+  DEFINE(S_R1,			offsetof(struct pt_regs, ARM_r1));
+  DEFINE(S_R2,			offsetof(struct pt_regs, ARM_r2));
+  DEFINE(S_R3,			offsetof(struct pt_regs, ARM_r3));
+  DEFINE(S_R4,			offsetof(struct pt_regs, ARM_r4));
+  DEFINE(S_R5,			offsetof(struct pt_regs, ARM_r5));
+  DEFINE(S_R6,			offsetof(struct pt_regs, ARM_r6));
+  DEFINE(S_R7,			offsetof(struct pt_regs, ARM_r7));
+  DEFINE(S_R8,			offsetof(struct pt_regs, ARM_r8));
+  DEFINE(S_R9,			offsetof(struct pt_regs, ARM_r9));
+  DEFINE(S_R10,			offsetof(struct pt_regs, ARM_r10));
+  DEFINE(S_FP,			offsetof(struct pt_regs, ARM_fp));
+  DEFINE(S_IP,			offsetof(struct pt_regs, ARM_ip));
+  DEFINE(S_SP,			offsetof(struct pt_regs, ARM_sp));
+  DEFINE(S_LR,			offsetof(struct pt_regs, ARM_lr));
+  DEFINE(S_PC,			offsetof(struct pt_regs, ARM_pc));
+  DEFINE(S_PSR,			offsetof(struct pt_regs, ARM_cpsr));
+  DEFINE(S_OLD_R0,		offsetof(struct pt_regs, ARM_ORIG_r0));
+  DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
+  BLANK();
+#ifdef CONFIG_CACHE_L2X0
+  DEFINE(L2X0_R_PHY_BASE,	offsetof(struct l2x0_regs, phy_base));
+  DEFINE(L2X0_R_AUX_CTRL,	offsetof(struct l2x0_regs, aux_ctrl));
+  DEFINE(L2X0_R_TAG_LATENCY,	offsetof(struct l2x0_regs, tag_latency));
+  DEFINE(L2X0_R_DATA_LATENCY,	offsetof(struct l2x0_regs, data_latency));
+  DEFINE(L2X0_R_FILTER_START,	offsetof(struct l2x0_regs, filter_start));
+  DEFINE(L2X0_R_FILTER_END,	offsetof(struct l2x0_regs, filter_end));
+  DEFINE(L2X0_R_PREFETCH_CTRL,	offsetof(struct l2x0_regs, prefetch_ctrl));
+  DEFINE(L2X0_R_PWR_CTRL,	offsetof(struct l2x0_regs, pwr_ctrl));
+  BLANK();
+#endif
+#ifdef CONFIG_CPU_HAS_ASID
+  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
+  BLANK();
+#endif
+  DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
+  DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags));
+  BLANK();
+  DEFINE(VM_EXEC,	       	VM_EXEC);
+  BLANK();
+  DEFINE(PAGE_SZ,	       	PAGE_SIZE);
+  BLANK();
+  DEFINE(SYS_ERROR0,		0x9f0000);
+  BLANK();
+  DEFINE(SIZEOF_MACHINE_DESC,	sizeof(struct machine_desc));
+  DEFINE(MACHINFO_TYPE,		offsetof(struct machine_desc, nr));
+  DEFINE(MACHINFO_NAME,		offsetof(struct machine_desc, name));
+  BLANK();
+  DEFINE(PROC_INFO_SZ,		sizeof(struct proc_info_list));
+  DEFINE(PROCINFO_INITFUNC,	offsetof(struct proc_info_list, __cpu_flush));
+  DEFINE(PROCINFO_MM_MMUFLAGS,	offsetof(struct proc_info_list, __cpu_mm_mmu_flags));
+  DEFINE(PROCINFO_IO_MMUFLAGS,	offsetof(struct proc_info_list, __cpu_io_mmu_flags));
+  BLANK();
+#ifdef MULTI_DABORT
+  DEFINE(PROCESSOR_DABT_FUNC,	offsetof(struct processor, _data_abort));
+#endif
+#ifdef MULTI_PABORT
+  DEFINE(PROCESSOR_PABT_FUNC,	offsetof(struct processor, _prefetch_abort));
+#endif
+#ifdef MULTI_CPU
+  DEFINE(CPU_SLEEP_SIZE,	offsetof(struct processor, suspend_size));
+  DEFINE(CPU_DO_SUSPEND,	offsetof(struct processor, do_suspend));
+  DEFINE(CPU_DO_RESUME,		offsetof(struct processor, do_resume));
+#endif
+#ifdef MULTI_CACHE
+  DEFINE(CACHE_FLUSH_KERN_ALL,	offsetof(struct cpu_cache_fns, flush_kern_all));
+#endif
+#ifdef CONFIG_ARM_CPU_SUSPEND
+  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
+  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
+  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
+#endif
+  BLANK();
+  DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
+  DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
+  DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
+  BLANK();
+  DEFINE(CACHE_WRITEBACK_ORDER, __CACHE_WRITEBACK_ORDER);
+  DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE);
+  BLANK();
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_MIDR,		offsetof(struct kvm_vcpu, arch.midr));
+  DEFINE(VCPU_CP15,		offsetof(struct kvm_vcpu, arch.cp15));
+  DEFINE(VCPU_VFP_GUEST,	offsetof(struct kvm_vcpu, arch.vfp_guest));
+  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
+  DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
+  DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
+  DEFINE(VCPU_ABT_REGS,		offsetof(struct kvm_vcpu, arch.regs.abt_regs));
+  DEFINE(VCPU_UND_REGS,		offsetof(struct kvm_vcpu, arch.regs.und_regs));
+  DEFINE(VCPU_IRQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.irq_regs));
+  DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
+  DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
+  DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
+  DEFINE(VCPU_HCR,		offsetof(struct kvm_vcpu, arch.hcr));
+  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.fault.hsr));
+  DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
+  DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.fault.hpfar));
+  DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
+  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
+  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+#endif
+  BLANK();
+#ifdef CONFIG_VDSO
+  DEFINE(VDSO_DATA_SIZE,	sizeof(union vdso_data_store));
+#endif
+  return 0; 
+}
diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h
new file mode 100644
index 000000000..ec4164da6
--- /dev/null
+++ b/arch/arm/kernel/atags.h
@@ -0,0 +1,20 @@
+#ifdef CONFIG_ATAGS_PROC
+extern void save_atags(struct tag *tags);
+#else
+static inline void save_atags(struct tag *tags) { }
+#endif
+
+void convert_to_tag_list(struct tag *tags);
+
+#ifdef CONFIG_ATAGS
+const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer,
+	unsigned int machine_nr);
+#else
+static inline const struct machine_desc *
+setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
+{
+	early_print("no ATAGS support: can't continue\n");
+	while (true);
+	unreachable();
+}
+#endif
diff --git a/arch/arm/kernel/atags_compat.c b/arch/arm/kernel/atags_compat.c
new file mode 100644
index 000000000..05c28b123
--- /dev/null
+++ b/arch/arm/kernel/atags_compat.c
@@ -0,0 +1,217 @@
+/*
+ *  linux/arch/arm/kernel/atags_compat.c
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * We keep the old params compatibility cruft in one place (here)
+ * so we don't end up with lots of mess around other places.
+ *
+ * NOTE:
+ *  The old struct param_struct is deprecated, but it will be kept in
+ *  the kernel for 5 years from now (2001). This will allow boot loaders
+ *  to convert to the new struct tag way.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/page.h>
+
+#include <asm/mach/arch.h>
+
+#include "atags.h"
+
+/*
+ * Usage:
+ *  - do not go blindly adding fields, add them at the end
+ *  - when adding fields, don't rely on the address until
+ *    a patch from me has been released
+ *  - unused fields should be zero (for future expansion)
+ *  - this structure is relatively short-lived - only
+ *    guaranteed to contain useful data in setup_arch()
+ *
+ * This is the old deprecated way to pass parameters to the kernel
+ */
+struct param_struct {
+    union {
+	struct {
+	    unsigned long page_size;		/*  0 */
+	    unsigned long nr_pages;		/*  4 */
+	    unsigned long ramdisk_size;		/*  8 */
+	    unsigned long flags;		/* 12 */
+#define FLAG_READONLY	1
+#define FLAG_RDLOAD	4
+#define FLAG_RDPROMPT	8
+	    unsigned long rootdev;		/* 16 */
+	    unsigned long video_num_cols;	/* 20 */
+	    unsigned long video_num_rows;	/* 24 */
+	    unsigned long video_x;		/* 28 */
+	    unsigned long video_y;		/* 32 */
+	    unsigned long memc_control_reg;	/* 36 */
+	    unsigned char sounddefault;		/* 40 */
+	    unsigned char adfsdrives;		/* 41 */
+	    unsigned char bytes_per_char_h;	/* 42 */
+	    unsigned char bytes_per_char_v;	/* 43 */
+	    unsigned long pages_in_bank[4];	/* 44 */
+	    unsigned long pages_in_vram;	/* 60 */
+	    unsigned long initrd_start;		/* 64 */
+	    unsigned long initrd_size;		/* 68 */
+	    unsigned long rd_start;		/* 72 */
+	    unsigned long system_rev;		/* 76 */
+	    unsigned long system_serial_low;	/* 80 */
+	    unsigned long system_serial_high;	/* 84 */
+	    unsigned long mem_fclk_21285;       /* 88 */
+	} s;
+	char unused[256];
+    } u1;
+    union {
+	char paths[8][128];
+	struct {
+	    unsigned long magic;
+	    char n[1024 - sizeof(unsigned long)];
+	} s;
+    } u2;
+    char commandline[COMMAND_LINE_SIZE];
+};
+
+static struct tag * __init memtag(struct tag *tag, unsigned long start, unsigned long size)
+{
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_MEM;
+	tag->hdr.size = tag_size(tag_mem32);
+	tag->u.mem.size = size;
+	tag->u.mem.start = start;
+
+	return tag;
+}
+
+static void __init build_tag_list(struct param_struct *params, void *taglist)
+{
+	struct tag *tag = taglist;
+
+	if (params->u1.s.page_size != PAGE_SIZE) {
+		pr_warn("Warning: bad configuration page, trying to continue\n");
+		return;
+	}
+
+	printk(KERN_DEBUG "Converting old-style param struct to taglist\n");
+
+#ifdef CONFIG_ARCH_NETWINDER
+	if (params->u1.s.nr_pages != 0x02000 &&
+	    params->u1.s.nr_pages != 0x04000 &&
+	    params->u1.s.nr_pages != 0x08000 &&
+	    params->u1.s.nr_pages != 0x10000) {
+		pr_warn("Warning: bad NeTTrom parameters detected, using defaults\n");
+
+		params->u1.s.nr_pages = 0x1000;	/* 16MB */
+		params->u1.s.ramdisk_size = 0;
+		params->u1.s.flags = FLAG_READONLY;
+		params->u1.s.initrd_start = 0;
+		params->u1.s.initrd_size = 0;
+		params->u1.s.rd_start = 0;
+	}
+#endif
+
+	tag->hdr.tag  = ATAG_CORE;
+	tag->hdr.size = tag_size(tag_core);
+	tag->u.core.flags = params->u1.s.flags & FLAG_READONLY;
+	tag->u.core.pagesize = params->u1.s.page_size;
+	tag->u.core.rootdev = params->u1.s.rootdev;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_RAMDISK;
+	tag->hdr.size = tag_size(tag_ramdisk);
+	tag->u.ramdisk.flags = (params->u1.s.flags & FLAG_RDLOAD ? 1 : 0) |
+			       (params->u1.s.flags & FLAG_RDPROMPT ? 2 : 0);
+	tag->u.ramdisk.size  = params->u1.s.ramdisk_size;
+	tag->u.ramdisk.start = params->u1.s.rd_start;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_INITRD;
+	tag->hdr.size = tag_size(tag_initrd);
+	tag->u.initrd.start = params->u1.s.initrd_start;
+	tag->u.initrd.size  = params->u1.s.initrd_size;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_SERIAL;
+	tag->hdr.size = tag_size(tag_serialnr);
+	tag->u.serialnr.low = params->u1.s.system_serial_low;
+	tag->u.serialnr.high = params->u1.s.system_serial_high;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_REVISION;
+	tag->hdr.size = tag_size(tag_revision);
+	tag->u.revision.rev = params->u1.s.system_rev;
+
+#ifdef CONFIG_ARCH_ACORN
+	if (machine_is_riscpc()) {
+		int i;
+		for (i = 0; i < 4; i++)
+			tag = memtag(tag, PHYS_OFFSET + (i << 26),
+				 params->u1.s.pages_in_bank[i] * PAGE_SIZE);
+	} else
+#endif
+	tag = memtag(tag, PHYS_OFFSET, params->u1.s.nr_pages * PAGE_SIZE);
+
+#ifdef CONFIG_FOOTBRIDGE
+	if (params->u1.s.mem_fclk_21285) {
+		tag = tag_next(tag);
+		tag->hdr.tag = ATAG_MEMCLK;
+		tag->hdr.size = tag_size(tag_memclk);
+		tag->u.memclk.fmemclk = params->u1.s.mem_fclk_21285;
+	}
+#endif
+
+#ifdef CONFIG_ARCH_EBSA285
+	if (machine_is_ebsa285()) {
+		tag = tag_next(tag);
+		tag->hdr.tag = ATAG_VIDEOTEXT;
+		tag->hdr.size = tag_size(tag_videotext);
+		tag->u.videotext.x            = params->u1.s.video_x;
+		tag->u.videotext.y            = params->u1.s.video_y;
+		tag->u.videotext.video_page   = 0;
+		tag->u.videotext.video_mode   = 0;
+		tag->u.videotext.video_cols   = params->u1.s.video_num_cols;
+		tag->u.videotext.video_ega_bx = 0;
+		tag->u.videotext.video_lines  = params->u1.s.video_num_rows;
+		tag->u.videotext.video_isvga  = 1;
+		tag->u.videotext.video_points = 8;
+	}
+#endif
+
+#ifdef CONFIG_ARCH_ACORN
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_ACORN;
+	tag->hdr.size = tag_size(tag_acorn);
+	tag->u.acorn.memc_control_reg = params->u1.s.memc_control_reg;
+	tag->u.acorn.vram_pages       = params->u1.s.pages_in_vram;
+	tag->u.acorn.sounddefault     = params->u1.s.sounddefault;
+	tag->u.acorn.adfsdrives       = params->u1.s.adfsdrives;
+#endif
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_CMDLINE;
+	tag->hdr.size = (strlen(params->commandline) + 3 +
+			 sizeof(struct tag_header)) >> 2;
+	strcpy(tag->u.cmdline.cmdline, params->commandline);
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_NONE;
+	tag->hdr.size = 0;
+
+	memmove(params, taglist, ((int)tag) - ((int)taglist) +
+				 sizeof(struct tag_header));
+}
+
+void __init convert_to_tag_list(struct tag *tags)
+{
+	struct param_struct *params = (struct param_struct *)tags;
+	build_tag_list(params, &params->u2);
+}
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
new file mode 100644
index 000000000..68c6ae0b9
--- /dev/null
+++ b/arch/arm/kernel/atags_parse.c
@@ -0,0 +1,238 @@
+/*
+ * Tag parsing.
+ *
+ * Copyright (C) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This is the traditional way of passing data to the kernel at boot time.  Rather
+ * than passing a fixed inflexible structure to the kernel, we pass a list
+ * of variable-sized tags to the kernel.  The first tag must be a ATAG_CORE
+ * tag for the list to be recognised (to distinguish the tagged list from
+ * a param_struct).  The list is terminated with a zero-length tag (this tag
+ * is not parsed in any way).
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/root_dev.h>
+#include <linux/screen_info.h>
+#include <linux/memblock.h>
+
+#include <asm/setup.h>
+#include <asm/system_info.h>
+#include <asm/page.h>
+#include <asm/mach/arch.h>
+
+#include "atags.h"
+
+static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
+
+#ifndef MEM_SIZE
+#define MEM_SIZE	(16*1024*1024)
+#endif
+
+static struct {
+	struct tag_header hdr1;
+	struct tag_core   core;
+	struct tag_header hdr2;
+	struct tag_mem32  mem;
+	struct tag_header hdr3;
+} default_tags __initdata = {
+	{ tag_size(tag_core), ATAG_CORE },
+	{ 1, PAGE_SIZE, 0xff },
+	{ tag_size(tag_mem32), ATAG_MEM },
+	{ MEM_SIZE },
+	{ 0, ATAG_NONE }
+};
+
+static int __init parse_tag_core(const struct tag *tag)
+{
+	if (tag->hdr.size > 2) {
+		if ((tag->u.core.flags & 1) == 0)
+			root_mountflags &= ~MS_RDONLY;
+		ROOT_DEV = old_decode_dev(tag->u.core.rootdev);
+	}
+	return 0;
+}
+
+__tagtable(ATAG_CORE, parse_tag_core);
+
+static int __init parse_tag_mem32(const struct tag *tag)
+{
+	return arm_add_memory(tag->u.mem.start, tag->u.mem.size);
+}
+
+__tagtable(ATAG_MEM, parse_tag_mem32);
+
+#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
+static int __init parse_tag_videotext(const struct tag *tag)
+{
+	screen_info.orig_x            = tag->u.videotext.x;
+	screen_info.orig_y            = tag->u.videotext.y;
+	screen_info.orig_video_page   = tag->u.videotext.video_page;
+	screen_info.orig_video_mode   = tag->u.videotext.video_mode;
+	screen_info.orig_video_cols   = tag->u.videotext.video_cols;
+	screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx;
+	screen_info.orig_video_lines  = tag->u.videotext.video_lines;
+	screen_info.orig_video_isVGA  = tag->u.videotext.video_isvga;
+	screen_info.orig_video_points = tag->u.videotext.video_points;
+	return 0;
+}
+
+__tagtable(ATAG_VIDEOTEXT, parse_tag_videotext);
+#endif
+
+#ifdef CONFIG_BLK_DEV_RAM
+static int __init parse_tag_ramdisk(const struct tag *tag)
+{
+	extern int rd_size, rd_image_start, rd_prompt, rd_doload;
+
+	rd_image_start = tag->u.ramdisk.start;
+	rd_doload = (tag->u.ramdisk.flags & 1) == 0;
+	rd_prompt = (tag->u.ramdisk.flags & 2) == 0;
+
+	if (tag->u.ramdisk.size)
+		rd_size = tag->u.ramdisk.size;
+
+	return 0;
+}
+
+__tagtable(ATAG_RAMDISK, parse_tag_ramdisk);
+#endif
+
+static int __init parse_tag_serialnr(const struct tag *tag)
+{
+	system_serial_low = tag->u.serialnr.low;
+	system_serial_high = tag->u.serialnr.high;
+	return 0;
+}
+
+__tagtable(ATAG_SERIAL, parse_tag_serialnr);
+
+static int __init parse_tag_revision(const struct tag *tag)
+{
+	system_rev = tag->u.revision.rev;
+	return 0;
+}
+
+__tagtable(ATAG_REVISION, parse_tag_revision);
+
+static int __init parse_tag_cmdline(const struct tag *tag)
+{
+#if defined(CONFIG_CMDLINE_EXTEND)
+	strlcat(default_command_line, " ", COMMAND_LINE_SIZE);
+	strlcat(default_command_line, tag->u.cmdline.cmdline,
+		COMMAND_LINE_SIZE);
+#elif defined(CONFIG_CMDLINE_FORCE)
+	pr_warn("Ignoring tag cmdline (using the default kernel command line)\n");
+#else
+	strlcpy(default_command_line, tag->u.cmdline.cmdline,
+		COMMAND_LINE_SIZE);
+#endif
+	return 0;
+}
+
+__tagtable(ATAG_CMDLINE, parse_tag_cmdline);
+
+/*
+ * Scan the tag table for this tag, and call its parse function.
+ * The tag table is built by the linker from all the __tagtable
+ * declarations.
+ */
+static int __init parse_tag(const struct tag *tag)
+{
+	extern struct tagtable __tagtable_begin, __tagtable_end;
+	struct tagtable *t;
+
+	for (t = &__tagtable_begin; t < &__tagtable_end; t++)
+		if (tag->hdr.tag == t->tag) {
+			t->parse(tag);
+			break;
+		}
+
+	return t < &__tagtable_end;
+}
+
+/*
+ * Parse all tags in the list, checking both the global and architecture
+ * specific tag tables.
+ */
+static void __init parse_tags(const struct tag *t)
+{
+	for (; t->hdr.size; t = tag_next(t))
+		if (!parse_tag(t))
+			pr_warn("Ignoring unrecognised tag 0x%08x\n",
+				t->hdr.tag);
+}
+
+static void __init squash_mem_tags(struct tag *tag)
+{
+	for (; tag->hdr.size; tag = tag_next(tag))
+		if (tag->hdr.tag == ATAG_MEM)
+			tag->hdr.tag = ATAG_NONE;
+}
+
+const struct machine_desc * __init
+setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
+{
+	struct tag *tags = (struct tag *)&default_tags;
+	const struct machine_desc *mdesc = NULL, *p;
+	char *from = default_command_line;
+
+	default_tags.mem.start = PHYS_OFFSET;
+
+	/*
+	 * locate machine in the list of supported machines.
+	 */
+	for_each_machine_desc(p)
+		if (machine_nr == p->nr) {
+			pr_info("Machine: %s\n", p->name);
+			mdesc = p;
+			break;
+		}
+
+	if (!mdesc) {
+		early_print("\nError: unrecognized/unsupported machine ID"
+			    " (r1 = 0x%08x).\n\n", machine_nr);
+		dump_machine_table(); /* does not return */
+	}
+
+	if (__atags_pointer)
+		tags = phys_to_virt(__atags_pointer);
+	else if (mdesc->atag_offset)
+		tags = (void *)(PAGE_OFFSET + mdesc->atag_offset);
+
+#if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
+	/*
+	 * If we have the old style parameters, convert them to
+	 * a tag list.
+	 */
+	if (tags->hdr.tag != ATAG_CORE)
+		convert_to_tag_list(tags);
+#endif
+	if (tags->hdr.tag != ATAG_CORE) {
+		early_print("Warning: Neither atags nor dtb found\n");
+		tags = (struct tag *)&default_tags;
+	}
+
+	if (mdesc->fixup)
+		mdesc->fixup(tags, &from);
+
+	if (tags->hdr.tag == ATAG_CORE) {
+		if (memblock_phys_mem_size())
+			squash_mem_tags(tags);
+		save_atags(tags);
+		parse_tags(tags);
+	}
+
+	/* parse_early_param needs a boot_command_line */
+	strlcpy(boot_command_line, from, COMMAND_LINE_SIZE);
+
+	return mdesc;
+}
diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c
new file mode 100644
index 000000000..5a3379055
--- /dev/null
+++ b/arch/arm/kernel/atags_proc.c
@@ -0,0 +1,75 @@
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <asm/setup.h>
+#include <asm/types.h>
+#include <asm/page.h>
+
+struct buffer {
+	size_t size;
+	char data[];
+};
+
+static ssize_t atags_read(struct file *file, char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	struct buffer *b = PDE_DATA(file_inode(file));
+	return simple_read_from_buffer(buf, count, ppos, b->data, b->size);
+}
+
+static const struct file_operations atags_fops = {
+	.read = atags_read,
+	.llseek = default_llseek,
+};
+
+#define BOOT_PARAMS_SIZE 1536
+static char __initdata atags_copy[BOOT_PARAMS_SIZE];
+
+void __init save_atags(const struct tag *tags)
+{
+	memcpy(atags_copy, tags, sizeof(atags_copy));
+}
+
+static int __init init_atags_procfs(void)
+{
+	/*
+	 * This cannot go into save_atags() because kmalloc and proc don't work
+	 * yet when it is called.
+	 */
+	struct proc_dir_entry *tags_entry;
+	struct tag *tag = (struct tag *)atags_copy;
+	struct buffer *b;
+	size_t size;
+
+	if (tag->hdr.tag != ATAG_CORE) {
+		pr_info("No ATAGs?");
+		return -EINVAL;
+	}
+
+	for (; tag->hdr.size; tag = tag_next(tag))
+		;
+
+	/* include the terminating ATAG_NONE */
+	size = (char *)tag - atags_copy + sizeof(struct tag_header);
+
+	WARN_ON(tag->hdr.tag != ATAG_NONE);
+
+	b = kmalloc(sizeof(*b) + size, GFP_KERNEL);
+	if (!b)
+		goto nomem;
+
+	b->size = size;
+	memcpy(b->data, atags_copy, size);
+
+	tags_entry = proc_create_data("atags", 0400, NULL, &atags_fops, b);
+	if (!tags_entry)
+		goto nomem;
+
+	return 0;
+
+nomem:
+	kfree(b);
+	pr_err("Exporting ATAGs: not enough memory\n");
+
+	return -ENOMEM;
+}
+arch_initcall(init_atags_procfs);
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
new file mode 100644
index 000000000..fcbbbb1b9
--- /dev/null
+++ b/arch/arm/kernel/bios32.c
@@ -0,0 +1,647 @@
+/*
+ *  linux/arch/arm/kernel/bios32.c
+ *
+ *  PCI bios-type initialisation for PCI machines
+ *
+ *  Bits taken from various places.
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/map.h>
+#include <asm/mach/pci.h>
+
+static int debug_pci;
+
+#ifdef CONFIG_PCI_MSI
+struct msi_controller *pcibios_msi_controller(struct pci_dev *dev)
+{
+	struct pci_sys_data *sysdata = dev->bus->sysdata;
+
+	return sysdata->msi_ctrl;
+}
+#endif
+
+/*
+ * We can't use pci_get_device() here since we are
+ * called from interrupt context.
+ */
+static void pcibios_bus_report_status(struct pci_bus *bus, u_int status_mask, int warn)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		u16 status;
+
+		/*
+		 * ignore host bridge - we handle
+		 * that separately
+		 */
+		if (dev->bus->number == 0 && dev->devfn == 0)
+			continue;
+
+		pci_read_config_word(dev, PCI_STATUS, &status);
+		if (status == 0xffff)
+			continue;
+
+		if ((status & status_mask) == 0)
+			continue;
+
+		/* clear the status errors */
+		pci_write_config_word(dev, PCI_STATUS, status & status_mask);
+
+		if (warn)
+			printk("(%s: %04X) ", pci_name(dev), status);
+	}
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if (dev->subordinate)
+			pcibios_bus_report_status(dev->subordinate, status_mask, warn);
+}
+
+void pcibios_report_status(u_int status_mask, int warn)
+{
+	struct pci_bus *bus;
+
+	list_for_each_entry(bus, &pci_root_buses, node)
+		pcibios_bus_report_status(bus, status_mask, warn);
+}
+
+/*
+ * We don't use this to fix the device, but initialisation of it.
+ * It's not the correct use for this, but it works.
+ * Note that the arbiter/ISA bridge appears to be buggy, specifically in
+ * the following area:
+ * 1. park on CPU
+ * 2. ISA bridge ping-pong
+ * 3. ISA bridge master handling of target RETRY
+ *
+ * Bug 3 is responsible for the sound DMA grinding to a halt.  We now
+ * live with bug 2.
+ */
+static void pci_fixup_83c553(struct pci_dev *dev)
+{
+	/*
+	 * Set memory region to start at address 0, and enable IO
+	 */
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_SPACE_MEMORY);
+	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_IO);
+
+	dev->resource[0].end -= dev->resource[0].start;
+	dev->resource[0].start = 0;
+
+	/*
+	 * All memory requests from ISA to be channelled to PCI
+	 */
+	pci_write_config_byte(dev, 0x48, 0xff);
+
+	/*
+	 * Enable ping-pong on bus master to ISA bridge transactions.
+	 * This improves the sound DMA substantially.  The fixed
+	 * priority arbiter also helps (see below).
+	 */
+	pci_write_config_byte(dev, 0x42, 0x01);
+
+	/*
+	 * Enable PCI retry
+	 */
+	pci_write_config_byte(dev, 0x40, 0x22);
+
+	/*
+	 * We used to set the arbiter to "park on last master" (bit
+	 * 1 set), but unfortunately the CyberPro does not park the
+	 * bus.  We must therefore park on CPU.  Unfortunately, this
+	 * may trigger yet another bug in the 553.
+	 */
+	pci_write_config_byte(dev, 0x83, 0x02);
+
+	/*
+	 * Make the ISA DMA request lowest priority, and disable
+	 * rotating priorities completely.
+	 */
+	pci_write_config_byte(dev, 0x80, 0x11);
+	pci_write_config_byte(dev, 0x81, 0x00);
+
+	/*
+	 * Route INTA input to IRQ 11, and set IRQ11 to be level
+	 * sensitive.
+	 */
+	pci_write_config_word(dev, 0x44, 0xb000);
+	outb(0x08, 0x4d1);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_83C553, pci_fixup_83c553);
+
+static void pci_fixup_unassign(struct pci_dev *dev)
+{
+	dev->resource[0].end -= dev->resource[0].start;
+	dev->resource[0].start = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND2, PCI_DEVICE_ID_WINBOND2_89C940F, pci_fixup_unassign);
+
+/*
+ * Prevent the PCI layer from seeing the resources allocated to this device
+ * if it is the host bridge by marking it as such.  These resources are of
+ * no consequence to the PCI layer (they are handled elsewhere).
+ */
+static void pci_fixup_dec21285(struct pci_dev *dev)
+{
+	int i;
+
+	if (dev->devfn == 0) {
+		dev->class &= 0xff;
+		dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			dev->resource[i].start = 0;
+			dev->resource[i].end   = 0;
+			dev->resource[i].flags = 0;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, pci_fixup_dec21285);
+
+/*
+ * PCI IDE controllers use non-standard I/O port decoding, respect it.
+ */
+static void pci_fixup_ide_bases(struct pci_dev *dev)
+{
+	struct resource *r;
+	int i;
+
+	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		r = dev->resource + i;
+		if ((r->start & ~0x80) == 0x374) {
+			r->start |= 2;
+			r->end = r->start;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
+
+/*
+ * Put the DEC21142 to sleep
+ */
+static void pci_fixup_dec21142(struct pci_dev *dev)
+{
+	pci_write_config_dword(dev, 0x40, 0x80000000);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21142, pci_fixup_dec21142);
+
+/*
+ * The CY82C693 needs some rather major fixups to ensure that it does
+ * the right thing.  Idea from the Alpha people, with a few additions.
+ *
+ * We ensure that the IDE base registers are set to 1f0/3f4 for the
+ * primary bus, and 170/374 for the secondary bus.  Also, hide them
+ * from the PCI subsystem view as well so we won't try to perform
+ * our own auto-configuration on them.
+ *
+ * In addition, we ensure that the PCI IDE interrupts are routed to
+ * IRQ 14 and IRQ 15 respectively.
+ *
+ * The above gets us to a point where the IDE on this device is
+ * functional.  However, The CY82C693U _does not work_ in bus
+ * master mode without locking the PCI bus solid.
+ */
+static void pci_fixup_cy82c693(struct pci_dev *dev)
+{
+	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE) {
+		u32 base0, base1;
+
+		if (dev->class & 0x80) {	/* primary */
+			base0 = 0x1f0;
+			base1 = 0x3f4;
+		} else {			/* secondary */
+			base0 = 0x170;
+			base1 = 0x374;
+		}
+
+		pci_write_config_dword(dev, PCI_BASE_ADDRESS_0,
+				       base0 | PCI_BASE_ADDRESS_SPACE_IO);
+		pci_write_config_dword(dev, PCI_BASE_ADDRESS_1,
+				       base1 | PCI_BASE_ADDRESS_SPACE_IO);
+
+		dev->resource[0].start = 0;
+		dev->resource[0].end   = 0;
+		dev->resource[0].flags = 0;
+
+		dev->resource[1].start = 0;
+		dev->resource[1].end   = 0;
+		dev->resource[1].flags = 0;
+	} else if (PCI_FUNC(dev->devfn) == 0) {
+		/*
+		 * Setup IDE IRQ routing.
+		 */
+		pci_write_config_byte(dev, 0x4b, 14);
+		pci_write_config_byte(dev, 0x4c, 15);
+
+		/*
+		 * Disable FREQACK handshake, enable USB.
+		 */
+		pci_write_config_byte(dev, 0x4d, 0x41);
+
+		/*
+		 * Enable PCI retry, and PCI post-write buffer.
+		 */
+		pci_write_config_byte(dev, 0x44, 0x17);
+
+		/*
+		 * Enable ISA master and DMA post write buffering.
+		 */
+		pci_write_config_byte(dev, 0x45, 0x03);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, pci_fixup_cy82c693);
+
+static void pci_fixup_it8152(struct pci_dev *dev)
+{
+	int i;
+	/* fixup for ITE 8152 devices */
+	/* FIXME: add defines for class 0x68000 and 0x80103 */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_HOST ||
+	    dev->class == 0x68000 ||
+	    dev->class == 0x80103) {
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			dev->resource[i].start = 0;
+			dev->resource[i].end   = 0;
+			dev->resource[i].flags = 0;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it8152);
+
+/*
+ * If the bus contains any of these devices, then we must not turn on
+ * parity checking of any kind.  Currently this is CyberPro 20x0 only.
+ */
+static inline int pdev_bad_for_parity(struct pci_dev *dev)
+{
+	return ((dev->vendor == PCI_VENDOR_ID_INTERG &&
+		 (dev->device == PCI_DEVICE_ID_INTERG_2000 ||
+		  dev->device == PCI_DEVICE_ID_INTERG_2010)) ||
+		(dev->vendor == PCI_VENDOR_ID_ITE &&
+		 dev->device == PCI_DEVICE_ID_ITE_8152));
+
+}
+
+/*
+ * pcibios_fixup_bus - Called after each bus is probed,
+ * but before its children are examined.
+ */
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	u16 features = PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_FAST_BACK;
+
+	/*
+	 * Walk the devices on this bus, working out what we can
+	 * and can't support.
+	 */
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		u16 status;
+
+		pci_read_config_word(dev, PCI_STATUS, &status);
+
+		/*
+		 * If any device on this bus does not support fast back
+		 * to back transfers, then the bus as a whole is not able
+		 * to support them.  Having fast back to back transfers
+		 * on saves us one PCI cycle per transaction.
+		 */
+		if (!(status & PCI_STATUS_FAST_BACK))
+			features &= ~PCI_COMMAND_FAST_BACK;
+
+		if (pdev_bad_for_parity(dev))
+			features &= ~(PCI_COMMAND_SERR | PCI_COMMAND_PARITY);
+
+		switch (dev->class >> 8) {
+		case PCI_CLASS_BRIDGE_PCI:
+			pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &status);
+			status |= PCI_BRIDGE_CTL_PARITY|PCI_BRIDGE_CTL_MASTER_ABORT;
+			status &= ~(PCI_BRIDGE_CTL_BUS_RESET|PCI_BRIDGE_CTL_FAST_BACK);
+			pci_write_config_word(dev, PCI_BRIDGE_CONTROL, status);
+			break;
+
+		case PCI_CLASS_BRIDGE_CARDBUS:
+			pci_read_config_word(dev, PCI_CB_BRIDGE_CONTROL, &status);
+			status |= PCI_CB_BRIDGE_CTL_PARITY|PCI_CB_BRIDGE_CTL_MASTER_ABORT;
+			pci_write_config_word(dev, PCI_CB_BRIDGE_CONTROL, status);
+			break;
+		}
+	}
+
+	/*
+	 * Now walk the devices again, this time setting them up.
+	 */
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		u16 cmd;
+
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd |= features;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
+				      L1_CACHE_BYTES >> 2);
+	}
+
+	/*
+	 * Propagate the flags to the PCI bridge.
+	 */
+	if (bus->self && bus->self->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+		if (features & PCI_COMMAND_FAST_BACK)
+			bus->bridge_ctl |= PCI_BRIDGE_CTL_FAST_BACK;
+		if (features & PCI_COMMAND_PARITY)
+			bus->bridge_ctl |= PCI_BRIDGE_CTL_PARITY;
+	}
+
+	/*
+	 * Report what we did for this bus
+	 */
+	pr_info("PCI: bus%d: Fast back to back transfers %sabled\n",
+		bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis");
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+/*
+ * Swizzle the device pin each time we cross a bridge.  If a platform does
+ * not provide a swizzle function, we perform the standard PCI swizzling.
+ *
+ * The default swizzling walks up the bus tree one level at a time, applying
+ * the standard swizzle function at each step, stopping when it finds the PCI
+ * root bus.  This will return the slot number of the bridge device on the
+ * root bus and the interrupt pin on that device which should correspond
+ * with the downstream device interrupt.
+ *
+ * Platforms may override this, in which case the slot and pin returned
+ * depend entirely on the platform code.  However, please note that the
+ * PCI standard swizzle is implemented on plug-in cards and Cardbus based
+ * PCI extenders, so it can not be ignored.
+ */
+static u8 pcibios_swizzle(struct pci_dev *dev, u8 *pin)
+{
+	struct pci_sys_data *sys = dev->sysdata;
+	int slot, oldpin = *pin;
+
+	if (sys->swizzle)
+		slot = sys->swizzle(dev, pin);
+	else
+		slot = pci_common_swizzle(dev, pin);
+
+	if (debug_pci)
+		printk("PCI: %s swizzling pin %d => pin %d slot %d\n",
+			pci_name(dev), oldpin, *pin, slot);
+
+	return slot;
+}
+
+/*
+ * Map a slot/pin to an IRQ.
+ */
+static int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct pci_sys_data *sys = dev->sysdata;
+	int irq = -1;
+
+	if (sys->map_irq)
+		irq = sys->map_irq(dev, slot, pin);
+
+	if (debug_pci)
+		printk("PCI: %s mapping slot %d pin %d => irq %d\n",
+			pci_name(dev), slot, pin, irq);
+
+	return irq;
+}
+
+static int pcibios_init_resources(int busnr, struct pci_sys_data *sys)
+{
+	int ret;
+	struct resource_entry *window;
+
+	if (list_empty(&sys->resources)) {
+		pci_add_resource_offset(&sys->resources,
+			 &iomem_resource, sys->mem_offset);
+	}
+
+	resource_list_for_each_entry(window, &sys->resources)
+		if (resource_type(window->res) == IORESOURCE_IO)
+			return 0;
+
+	sys->io_res.start = (busnr * SZ_64K) ?  : pcibios_min_io;
+	sys->io_res.end = (busnr + 1) * SZ_64K - 1;
+	sys->io_res.flags = IORESOURCE_IO;
+	sys->io_res.name = sys->io_res_name;
+	sprintf(sys->io_res_name, "PCI%d I/O", busnr);
+
+	ret = request_resource(&ioport_resource, &sys->io_res);
+	if (ret) {
+		pr_err("PCI: unable to allocate I/O port region (%d)\n", ret);
+		return ret;
+	}
+	pci_add_resource_offset(&sys->resources, &sys->io_res,
+				sys->io_offset);
+
+	return 0;
+}
+
+static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
+			    struct list_head *head)
+{
+	struct pci_sys_data *sys = NULL;
+	int ret;
+	int nr, busnr;
+
+	for (nr = busnr = 0; nr < hw->nr_controllers; nr++) {
+		sys = kzalloc(sizeof(struct pci_sys_data), GFP_KERNEL);
+		if (!sys)
+			panic("PCI: unable to allocate sys data!");
+
+#ifdef CONFIG_PCI_MSI
+		sys->msi_ctrl = hw->msi_ctrl;
+#endif
+		sys->busnr   = busnr;
+		sys->swizzle = hw->swizzle;
+		sys->map_irq = hw->map_irq;
+		sys->align_resource = hw->align_resource;
+		INIT_LIST_HEAD(&sys->resources);
+
+		if (hw->private_data)
+			sys->private_data = hw->private_data[nr];
+
+		ret = hw->setup(nr, sys);
+
+		if (ret > 0) {
+			ret = pcibios_init_resources(nr, sys);
+			if (ret)  {
+				kfree(sys);
+				break;
+			}
+
+			if (hw->scan)
+				sys->bus = hw->scan(nr, sys);
+			else
+				sys->bus = pci_scan_root_bus(parent, sys->busnr,
+						hw->ops, sys, &sys->resources);
+
+			if (!sys->bus)
+				panic("PCI: unable to scan bus!");
+
+			busnr = sys->bus->busn_res.end + 1;
+
+			list_add(&sys->node, head);
+		} else {
+			kfree(sys);
+			if (ret < 0)
+				break;
+		}
+	}
+}
+
+void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
+{
+	struct pci_sys_data *sys;
+	LIST_HEAD(head);
+
+	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
+	if (hw->preinit)
+		hw->preinit();
+	pcibios_init_hw(parent, hw, &head);
+	if (hw->postinit)
+		hw->postinit();
+
+	pci_fixup_irqs(pcibios_swizzle, pcibios_map_irq);
+
+	list_for_each_entry(sys, &head, node) {
+		struct pci_bus *bus = sys->bus;
+
+		if (!pci_has_flag(PCI_PROBE_ONLY)) {
+			/*
+			 * Size the bridge windows.
+			 */
+			pci_bus_size_bridges(bus);
+
+			/*
+			 * Assign resources.
+			 */
+			pci_bus_assign_resources(bus);
+		}
+
+		/*
+		 * Tell drivers about devices found.
+		 */
+		pci_bus_add_devices(bus);
+	}
+
+	list_for_each_entry(sys, &head, node) {
+		struct pci_bus *bus = sys->bus;
+
+		/* Configure PCI Express settings */
+		if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
+			struct pci_bus *child;
+
+			list_for_each_entry(child, &bus->children, node)
+				pcie_bus_configure_settings(child);
+		}
+	}
+}
+
+#ifndef CONFIG_PCI_HOST_ITE8152
+void pcibios_set_master(struct pci_dev *dev)
+{
+	/* No special bus mastering setup handling */
+}
+#endif
+
+char * __init pcibios_setup(char *str)
+{
+	if (!strcmp(str, "debug")) {
+		debug_pci = 1;
+		return NULL;
+	} else if (!strcmp(str, "firmware")) {
+		pci_add_flags(PCI_PROBE_ONLY);
+		return NULL;
+	}
+	return str;
+}
+
+/*
+ * From arch/i386/kernel/pci-i386.c:
+ *
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might be mirrored at 0x0100-0x03ff..
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				resource_size_t size, resource_size_t align)
+{
+	struct pci_dev *dev = data;
+	struct pci_sys_data *sys = dev->sysdata;
+	resource_size_t start = res->start;
+
+	if (res->flags & IORESOURCE_IO && start & 0x300)
+		start = (start + 0x3ff) & ~0x3ff;
+
+	start = (start + align - 1) & ~(align - 1);
+
+	if (sys->align_resource)
+		return sys->align_resource(dev, res, start, size, align);
+
+	return start;
+}
+
+/**
+ * pcibios_enable_device - Enable I/O and memory.
+ * @dev: PCI device to be enabled
+ */
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	if (pci_has_flag(PCI_PROBE_ONLY))
+		return 0;
+
+	return pci_enable_resources(dev, mask);
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	if (mmap_state == pci_mmap_io)
+		return -EINVAL;
+
+	/*
+	 * Mark this as IO
+	 */
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			     vma->vm_end - vma->vm_start,
+			     vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+void __init pci_map_io_early(unsigned long pfn)
+{
+	struct map_desc pci_io_desc = {
+		.virtual	= PCI_IO_VIRT_BASE,
+		.type		= MT_DEVICE,
+		.length		= SZ_64K,
+	};
+
+	pci_io_desc.pfn = pfn;
+	iotable_init(&pci_io_desc, 1);
+}
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
new file mode 100644
index 000000000..05745eb83
--- /dev/null
+++ b/arch/arm/kernel/calls.S
@@ -0,0 +1,406 @@
+/*
+ *  linux/arch/arm/kernel/calls.S
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file is included thrice in entry-common.S
+ */
+/* 0 */		CALL(sys_restart_syscall)
+		CALL(sys_exit)
+		CALL(sys_fork)
+		CALL(sys_read)
+		CALL(sys_write)
+/* 5 */		CALL(sys_open)
+		CALL(sys_close)
+		CALL(sys_ni_syscall)		/* was sys_waitpid */
+		CALL(sys_creat)
+		CALL(sys_link)
+/* 10 */	CALL(sys_unlink)
+		CALL(sys_execve)
+		CALL(sys_chdir)
+		CALL(OBSOLETE(sys_time))	/* used by libc4 */
+		CALL(sys_mknod)
+/* 15 */	CALL(sys_chmod)
+		CALL(sys_lchown16)
+		CALL(sys_ni_syscall)		/* was sys_break */
+		CALL(sys_ni_syscall)		/* was sys_stat */
+		CALL(sys_lseek)
+/* 20 */	CALL(sys_getpid)
+		CALL(sys_mount)
+		CALL(OBSOLETE(sys_oldumount))	/* used by libc4 */
+		CALL(sys_setuid16)
+		CALL(sys_getuid16)
+/* 25 */	CALL(OBSOLETE(sys_stime))
+		CALL(sys_ptrace)
+		CALL(OBSOLETE(sys_alarm))	/* used by libc4 */
+		CALL(sys_ni_syscall)		/* was sys_fstat */
+		CALL(sys_pause)
+/* 30 */	CALL(OBSOLETE(sys_utime))	/* used by libc4 */
+		CALL(sys_ni_syscall)		/* was sys_stty */
+		CALL(sys_ni_syscall)		/* was sys_getty */
+		CALL(sys_access)
+		CALL(sys_nice)
+/* 35 */	CALL(sys_ni_syscall)		/* was sys_ftime */
+		CALL(sys_sync)
+		CALL(sys_kill)
+		CALL(sys_rename)
+		CALL(sys_mkdir)
+/* 40 */	CALL(sys_rmdir)
+		CALL(sys_dup)
+		CALL(sys_pipe)
+		CALL(sys_times)
+		CALL(sys_ni_syscall)		/* was sys_prof */
+/* 45 */	CALL(sys_brk)
+		CALL(sys_setgid16)
+		CALL(sys_getgid16)
+		CALL(sys_ni_syscall)		/* was sys_signal */
+		CALL(sys_geteuid16)
+/* 50 */	CALL(sys_getegid16)
+		CALL(sys_acct)
+		CALL(sys_umount)
+		CALL(sys_ni_syscall)		/* was sys_lock */
+		CALL(sys_ioctl)
+/* 55 */	CALL(sys_fcntl)
+		CALL(sys_ni_syscall)		/* was sys_mpx */
+		CALL(sys_setpgid)
+		CALL(sys_ni_syscall)		/* was sys_ulimit */
+		CALL(sys_ni_syscall)		/* was sys_olduname */
+/* 60 */	CALL(sys_umask)
+		CALL(sys_chroot)
+		CALL(sys_ustat)
+		CALL(sys_dup2)
+		CALL(sys_getppid)
+/* 65 */	CALL(sys_getpgrp)
+		CALL(sys_setsid)
+		CALL(sys_sigaction)
+		CALL(sys_ni_syscall)		/* was sys_sgetmask */
+		CALL(sys_ni_syscall)		/* was sys_ssetmask */
+/* 70 */	CALL(sys_setreuid16)
+		CALL(sys_setregid16)
+		CALL(sys_sigsuspend)
+		CALL(sys_sigpending)
+		CALL(sys_sethostname)
+/* 75 */	CALL(sys_setrlimit)
+		CALL(OBSOLETE(sys_old_getrlimit)) /* used by libc4 */
+		CALL(sys_getrusage)
+		CALL(sys_gettimeofday)
+		CALL(sys_settimeofday)
+/* 80 */	CALL(sys_getgroups16)
+		CALL(sys_setgroups16)
+		CALL(OBSOLETE(sys_old_select))	/* used by libc4 */
+		CALL(sys_symlink)
+		CALL(sys_ni_syscall)		/* was sys_lstat */
+/* 85 */	CALL(sys_readlink)
+		CALL(sys_uselib)
+		CALL(sys_swapon)
+		CALL(sys_reboot)
+		CALL(OBSOLETE(sys_old_readdir))	/* used by libc4 */
+/* 90 */	CALL(OBSOLETE(sys_old_mmap))	/* used by libc4 */
+		CALL(sys_munmap)
+		CALL(sys_truncate)
+		CALL(sys_ftruncate)
+		CALL(sys_fchmod)
+/* 95 */	CALL(sys_fchown16)
+		CALL(sys_getpriority)
+		CALL(sys_setpriority)
+		CALL(sys_ni_syscall)		/* was sys_profil */
+		CALL(sys_statfs)
+/* 100 */	CALL(sys_fstatfs)
+		CALL(sys_ni_syscall)		/* sys_ioperm */
+		CALL(OBSOLETE(ABI(sys_socketcall, sys_oabi_socketcall)))
+		CALL(sys_syslog)
+		CALL(sys_setitimer)
+/* 105 */	CALL(sys_getitimer)
+		CALL(sys_newstat)
+		CALL(sys_newlstat)
+		CALL(sys_newfstat)
+		CALL(sys_ni_syscall)		/* was sys_uname */
+/* 110 */	CALL(sys_ni_syscall)		/* was sys_iopl */
+		CALL(sys_vhangup)
+		CALL(sys_ni_syscall)
+		CALL(OBSOLETE(sys_syscall))	/* call a syscall */
+		CALL(sys_wait4)
+/* 115 */	CALL(sys_swapoff)
+		CALL(sys_sysinfo)
+		CALL(OBSOLETE(ABI(sys_ipc, sys_oabi_ipc)))
+		CALL(sys_fsync)
+		CALL(sys_sigreturn_wrapper)
+/* 120 */	CALL(sys_clone)
+		CALL(sys_setdomainname)
+		CALL(sys_newuname)
+		CALL(sys_ni_syscall)		/* modify_ldt */
+		CALL(sys_adjtimex)
+/* 125 */	CALL(sys_mprotect)
+		CALL(sys_sigprocmask)
+		CALL(sys_ni_syscall)		/* was sys_create_module */
+		CALL(sys_init_module)
+		CALL(sys_delete_module)
+/* 130 */	CALL(sys_ni_syscall)		/* was sys_get_kernel_syms */
+		CALL(sys_quotactl)
+		CALL(sys_getpgid)
+		CALL(sys_fchdir)
+		CALL(sys_bdflush)
+/* 135 */	CALL(sys_sysfs)
+		CALL(sys_personality)
+		CALL(sys_ni_syscall)		/* reserved for afs_syscall */
+		CALL(sys_setfsuid16)
+		CALL(sys_setfsgid16)
+/* 140 */	CALL(sys_llseek)
+		CALL(sys_getdents)
+		CALL(sys_select)
+		CALL(sys_flock)
+		CALL(sys_msync)
+/* 145 */	CALL(sys_readv)
+		CALL(sys_writev)
+		CALL(sys_getsid)
+		CALL(sys_fdatasync)
+		CALL(sys_sysctl)
+/* 150 */	CALL(sys_mlock)
+		CALL(sys_munlock)
+		CALL(sys_mlockall)
+		CALL(sys_munlockall)
+		CALL(sys_sched_setparam)
+/* 155 */	CALL(sys_sched_getparam)
+		CALL(sys_sched_setscheduler)
+		CALL(sys_sched_getscheduler)
+		CALL(sys_sched_yield)
+		CALL(sys_sched_get_priority_max)
+/* 160 */	CALL(sys_sched_get_priority_min)
+		CALL(sys_sched_rr_get_interval)
+		CALL(sys_nanosleep)
+		CALL(sys_mremap)
+		CALL(sys_setresuid16)
+/* 165 */	CALL(sys_getresuid16)
+		CALL(sys_ni_syscall)		/* vm86 */
+		CALL(sys_ni_syscall)		/* was sys_query_module */
+		CALL(sys_poll)
+		CALL(sys_ni_syscall)		/* was nfsservctl */
+/* 170 */	CALL(sys_setresgid16)
+		CALL(sys_getresgid16)
+		CALL(sys_prctl)
+		CALL(sys_rt_sigreturn_wrapper)
+		CALL(sys_rt_sigaction)
+/* 175 */	CALL(sys_rt_sigprocmask)
+		CALL(sys_rt_sigpending)
+		CALL(sys_rt_sigtimedwait)
+		CALL(sys_rt_sigqueueinfo)
+		CALL(sys_rt_sigsuspend)
+/* 180 */	CALL(ABI(sys_pread64, sys_oabi_pread64))
+		CALL(ABI(sys_pwrite64, sys_oabi_pwrite64))
+		CALL(sys_chown16)
+		CALL(sys_getcwd)
+		CALL(sys_capget)
+/* 185 */	CALL(sys_capset)
+		CALL(sys_sigaltstack)
+		CALL(sys_sendfile)
+		CALL(sys_ni_syscall)		/* getpmsg */
+		CALL(sys_ni_syscall)		/* putpmsg */
+/* 190 */	CALL(sys_vfork)
+		CALL(sys_getrlimit)
+		CALL(sys_mmap2)
+		CALL(ABI(sys_truncate64, sys_oabi_truncate64))
+		CALL(ABI(sys_ftruncate64, sys_oabi_ftruncate64))
+/* 195 */	CALL(ABI(sys_stat64, sys_oabi_stat64))
+		CALL(ABI(sys_lstat64, sys_oabi_lstat64))
+		CALL(ABI(sys_fstat64, sys_oabi_fstat64))
+		CALL(sys_lchown)
+		CALL(sys_getuid)
+/* 200 */	CALL(sys_getgid)
+		CALL(sys_geteuid)
+		CALL(sys_getegid)
+		CALL(sys_setreuid)
+		CALL(sys_setregid)
+/* 205 */	CALL(sys_getgroups)
+		CALL(sys_setgroups)
+		CALL(sys_fchown)
+		CALL(sys_setresuid)
+		CALL(sys_getresuid)
+/* 210 */	CALL(sys_setresgid)
+		CALL(sys_getresgid)
+		CALL(sys_chown)
+		CALL(sys_setuid)
+		CALL(sys_setgid)
+/* 215 */	CALL(sys_setfsuid)
+		CALL(sys_setfsgid)
+		CALL(sys_getdents64)
+		CALL(sys_pivot_root)
+		CALL(sys_mincore)
+/* 220 */	CALL(sys_madvise)
+		CALL(ABI(sys_fcntl64, sys_oabi_fcntl64))
+		CALL(sys_ni_syscall) /* TUX */
+		CALL(sys_ni_syscall)
+		CALL(sys_gettid)
+/* 225 */	CALL(ABI(sys_readahead, sys_oabi_readahead))
+		CALL(sys_setxattr)
+		CALL(sys_lsetxattr)
+		CALL(sys_fsetxattr)
+		CALL(sys_getxattr)
+/* 230 */	CALL(sys_lgetxattr)
+		CALL(sys_fgetxattr)
+		CALL(sys_listxattr)
+		CALL(sys_llistxattr)
+		CALL(sys_flistxattr)
+/* 235 */	CALL(sys_removexattr)
+		CALL(sys_lremovexattr)
+		CALL(sys_fremovexattr)
+		CALL(sys_tkill)
+		CALL(sys_sendfile64)
+/* 240 */	CALL(sys_futex)
+		CALL(sys_sched_setaffinity)
+		CALL(sys_sched_getaffinity)
+		CALL(sys_io_setup)
+		CALL(sys_io_destroy)
+/* 245 */	CALL(sys_io_getevents)
+		CALL(sys_io_submit)
+		CALL(sys_io_cancel)
+		CALL(sys_exit_group)
+		CALL(sys_lookup_dcookie)
+/* 250 */	CALL(sys_epoll_create)
+		CALL(ABI(sys_epoll_ctl, sys_oabi_epoll_ctl))
+		CALL(ABI(sys_epoll_wait, sys_oabi_epoll_wait))
+		CALL(sys_remap_file_pages)
+		CALL(sys_ni_syscall)	/* sys_set_thread_area */
+/* 255 */	CALL(sys_ni_syscall)	/* sys_get_thread_area */
+		CALL(sys_set_tid_address)
+		CALL(sys_timer_create)
+		CALL(sys_timer_settime)
+		CALL(sys_timer_gettime)
+/* 260 */	CALL(sys_timer_getoverrun)
+		CALL(sys_timer_delete)
+		CALL(sys_clock_settime)
+		CALL(sys_clock_gettime)
+		CALL(sys_clock_getres)
+/* 265 */	CALL(sys_clock_nanosleep)
+		CALL(sys_statfs64_wrapper)
+		CALL(sys_fstatfs64_wrapper)
+		CALL(sys_tgkill)
+		CALL(sys_utimes)
+/* 270 */	CALL(sys_arm_fadvise64_64)
+		CALL(sys_pciconfig_iobase)
+		CALL(sys_pciconfig_read)
+		CALL(sys_pciconfig_write)
+		CALL(sys_mq_open)
+/* 275 */	CALL(sys_mq_unlink)
+		CALL(sys_mq_timedsend)
+		CALL(sys_mq_timedreceive)
+		CALL(sys_mq_notify)
+		CALL(sys_mq_getsetattr)
+/* 280 */	CALL(sys_waitid)
+		CALL(sys_socket)
+		CALL(ABI(sys_bind, sys_oabi_bind))
+		CALL(ABI(sys_connect, sys_oabi_connect))
+		CALL(sys_listen)
+/* 285 */	CALL(sys_accept)
+		CALL(sys_getsockname)
+		CALL(sys_getpeername)
+		CALL(sys_socketpair)
+		CALL(sys_send)
+/* 290 */	CALL(ABI(sys_sendto, sys_oabi_sendto))
+		CALL(sys_recv)
+		CALL(sys_recvfrom)
+		CALL(sys_shutdown)
+		CALL(sys_setsockopt)
+/* 295 */	CALL(sys_getsockopt)
+		CALL(ABI(sys_sendmsg, sys_oabi_sendmsg))
+		CALL(sys_recvmsg)
+		CALL(ABI(sys_semop, sys_oabi_semop))
+		CALL(sys_semget)
+/* 300 */	CALL(sys_semctl)
+		CALL(sys_msgsnd)
+		CALL(sys_msgrcv)
+		CALL(sys_msgget)
+		CALL(sys_msgctl)
+/* 305 */	CALL(sys_shmat)
+		CALL(sys_shmdt)
+		CALL(sys_shmget)
+		CALL(sys_shmctl)
+		CALL(sys_add_key)
+/* 310 */	CALL(sys_request_key)
+		CALL(sys_keyctl)
+		CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
+/* vserver */	CALL(sys_ni_syscall)
+		CALL(sys_ioprio_set)
+/* 315 */	CALL(sys_ioprio_get)
+		CALL(sys_inotify_init)
+		CALL(sys_inotify_add_watch)
+		CALL(sys_inotify_rm_watch)
+		CALL(sys_mbind)
+/* 320 */	CALL(sys_get_mempolicy)
+		CALL(sys_set_mempolicy)
+		CALL(sys_openat)
+		CALL(sys_mkdirat)
+		CALL(sys_mknodat)
+/* 325 */	CALL(sys_fchownat)
+		CALL(sys_futimesat)
+		CALL(ABI(sys_fstatat64,  sys_oabi_fstatat64))
+		CALL(sys_unlinkat)
+		CALL(sys_renameat)
+/* 330 */	CALL(sys_linkat)
+		CALL(sys_symlinkat)
+		CALL(sys_readlinkat)
+		CALL(sys_fchmodat)
+		CALL(sys_faccessat)
+/* 335 */	CALL(sys_pselect6)
+		CALL(sys_ppoll)
+		CALL(sys_unshare)
+		CALL(sys_set_robust_list)
+		CALL(sys_get_robust_list)
+/* 340 */	CALL(sys_splice)
+		CALL(sys_sync_file_range2)
+		CALL(sys_tee)
+		CALL(sys_vmsplice)
+		CALL(sys_move_pages)
+/* 345 */	CALL(sys_getcpu)
+		CALL(sys_epoll_pwait)
+		CALL(sys_kexec_load)
+		CALL(sys_utimensat)
+		CALL(sys_signalfd)
+/* 350 */	CALL(sys_timerfd_create)
+		CALL(sys_eventfd)
+		CALL(sys_fallocate)
+		CALL(sys_timerfd_settime)
+		CALL(sys_timerfd_gettime)
+/* 355 */	CALL(sys_signalfd4)
+		CALL(sys_eventfd2)
+		CALL(sys_epoll_create1)
+		CALL(sys_dup3)
+		CALL(sys_pipe2)
+/* 360 */	CALL(sys_inotify_init1)
+		CALL(sys_preadv)
+		CALL(sys_pwritev)
+		CALL(sys_rt_tgsigqueueinfo)
+		CALL(sys_perf_event_open)
+/* 365 */	CALL(sys_recvmmsg)
+		CALL(sys_accept4)
+		CALL(sys_fanotify_init)
+		CALL(sys_fanotify_mark)
+		CALL(sys_prlimit64)
+/* 370 */	CALL(sys_name_to_handle_at)
+		CALL(sys_open_by_handle_at)
+		CALL(sys_clock_adjtime)
+		CALL(sys_syncfs)
+		CALL(sys_sendmmsg)
+/* 375 */	CALL(sys_setns)
+		CALL(sys_process_vm_readv)
+		CALL(sys_process_vm_writev)
+		CALL(sys_kcmp)
+		CALL(sys_finit_module)
+/* 380 */	CALL(sys_sched_setattr)
+		CALL(sys_sched_getattr)
+		CALL(sys_renameat2)
+		CALL(sys_seccomp)
+		CALL(sys_getrandom)
+/* 385 */	CALL(sys_memfd_create)
+		CALL(sys_bpf)
+		CALL(sys_execveat)
+#ifndef syscalls_counted
+.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
+#define syscalls_counted
+#endif
+.rept syscalls_padding
+		CALL(sys_ni_syscall)
+.endr
diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
new file mode 100644
index 000000000..318da3346
--- /dev/null
+++ b/arch/arm/kernel/cpuidle.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2012 Linaro Ltd.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <asm/cpuidle.h>
+
+extern struct of_cpuidle_method __cpuidle_method_of_table[];
+
+static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel
+	__used __section(__cpuidle_method_of_table_end);
+
+static struct cpuidle_ops cpuidle_ops[NR_CPUS];
+
+/**
+ * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle()
+ * @dev: not used
+ * @drv: not used
+ * @index: not used
+ *
+ * A trivial wrapper to allow the cpu_do_idle function to be assigned as a
+ * cpuidle callback by matching the function signature.
+ *
+ * Returns the index passed as parameter
+ */
+int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
+{
+	cpu_do_idle();
+
+	return index;
+}
+
+/**
+ * arm_cpuidle_suspend() - function to enter low power idle states
+ * @index: an integer used as an identifier for the low level PM callbacks
+ *
+ * This function calls the underlying arch specific low level PM code as
+ * registered at the init time.
+ *
+ * Returns -EOPNOTSUPP if no suspend callback is defined, the result of the
+ * callback otherwise.
+ */
+int arm_cpuidle_suspend(int index)
+{
+	int ret = -EOPNOTSUPP;
+	int cpu = smp_processor_id();
+
+	if (cpuidle_ops[cpu].suspend)
+		ret = cpuidle_ops[cpu].suspend(cpu, index);
+
+	return ret;
+}
+
+/**
+ * arm_cpuidle_get_ops() - find a registered cpuidle_ops by name
+ * @method: the method name
+ *
+ * Search in the __cpuidle_method_of_table array the cpuidle ops matching the
+ * method name.
+ *
+ * Returns a struct cpuidle_ops pointer, NULL if not found.
+ */
+static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
+{
+	struct of_cpuidle_method *m = __cpuidle_method_of_table;
+
+	for (; m->method; m++)
+		if (!strcmp(m->method, method))
+			return m->ops;
+
+	return NULL;
+}
+
+/**
+ * arm_cpuidle_read_ops() - Initialize the cpuidle ops with the device tree
+ * @dn: a pointer to a struct device node corresponding to a cpu node
+ * @cpu: the cpu identifier
+ *
+ * Get the method name defined in the 'enable-method' property, retrieve the
+ * associated cpuidle_ops and do a struct copy. This copy is needed because all
+ * cpuidle_ops are tagged __initdata and will be unloaded after the init
+ * process.
+ *
+ * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if
+ * no cpuidle_ops is registered for the 'enable-method'.
+ */
+static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu)
+{
+	const char *enable_method;
+	struct cpuidle_ops *ops;
+
+	enable_method = of_get_property(dn, "enable-method", NULL);
+	if (!enable_method)
+		return -ENOENT;
+
+	ops = arm_cpuidle_get_ops(enable_method);
+	if (!ops) {
+		pr_warn("%s: unsupported enable-method property: %s\n",
+			dn->full_name, enable_method);
+		return -EOPNOTSUPP;
+	}
+
+	cpuidle_ops[cpu] = *ops; /* structure copy */
+
+	pr_notice("cpuidle: enable-method property '%s'"
+		  " found operations\n", enable_method);
+
+	return 0;
+}
+
+/**
+ * arm_cpuidle_init() - Initialize cpuidle_ops for a specific cpu
+ * @cpu: the cpu to be initialized
+ *
+ * Initialize the cpuidle ops with the device for the cpu and then call
+ * the cpu's idle initialization callback. This may fail if the underlying HW
+ * is not operational.
+ *
+ * Returns:
+ *  0 on success,
+ *  -ENODEV if it fails to find the cpu node in the device tree,
+ *  -EOPNOTSUPP if it does not find a registered cpuidle_ops for this cpu,
+ *  -ENOENT if it fails to find an 'enable-method' property,
+ *  -ENXIO if the HW reports a failure or a misconfiguration,
+ *  -ENOMEM if the HW report an memory allocation failure 
+ */
+int __init arm_cpuidle_init(int cpu)
+{
+	struct device_node *cpu_node = of_cpu_device_node_get(cpu);
+	int ret;
+
+	if (!cpu_node)
+		return -ENODEV;
+
+	ret = arm_cpuidle_read_ops(cpu_node, cpu);
+	if (!ret && cpuidle_ops[cpu].init)
+		ret = cpuidle_ops[cpu].init(cpu_node, cpu);
+
+	of_node_put(cpu_node);
+
+	return ret;
+}
diff --git a/arch/arm/kernel/crash_dump.c b/arch/arm/kernel/crash_dump.c
new file mode 100644
index 000000000..5d1286d51
--- /dev/null
+++ b/arch/arm/kernel/crash_dump.c
@@ -0,0 +1,57 @@
+/*
+ * arch/arm/kernel/crash_dump.c
+ *
+ * Copyright (C) 2010 Nokia Corporation.
+ * Author: Mika Westerberg
+ *
+ * This code is taken from arch/x86/kernel/crash_dump_64.c
+ *   Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *   Copyright (C) IBM Corporation, 2004. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/crash_dump.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is int he user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
+			iounmap(vaddr);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(buf, vaddr + offset, csize);
+	}
+
+	iounmap(vaddr);
+	return csize;
+}
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
new file mode 100644
index 000000000..78c91b5f9
--- /dev/null
+++ b/arch/arm/kernel/debug.S
@@ -0,0 +1,138 @@
+/*
+ *  linux/arch/arm/kernel/debug.S
+ *
+ *  Copyright (C) 1994-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  32-bit debugging code
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+/*
+ * Some debugging routines (useful if you've got MM problems and
+ * printk isn't working).  For DEBUGGING ONLY!!!  Do not leave
+ * references to these in a production kernel!
+ */
+
+#if !defined(CONFIG_DEBUG_SEMIHOSTING)
+#include CONFIG_DEBUG_LL_INCLUDE
+#endif
+
+#ifdef CONFIG_MMU
+		.macro	addruart_current, rx, tmp1, tmp2
+		addruart	\tmp1, \tmp2, \rx
+		mrc		p15, 0, \rx, c1, c0
+		tst		\rx, #1
+		moveq		\rx, \tmp1
+		movne		\rx, \tmp2
+		.endm
+
+#else /* !CONFIG_MMU */
+		.macro	addruart_current, rx, tmp1, tmp2
+		addruart	\rx, \tmp1
+		.endm
+
+#endif /* CONFIG_MMU */
+
+/*
+ * Useful debugging routines
+ */
+ENTRY(printhex8)
+		mov	r1, #8
+		b	printhex
+ENDPROC(printhex8)
+
+ENTRY(printhex4)
+		mov	r1, #4
+		b	printhex
+ENDPROC(printhex4)
+
+ENTRY(printhex2)
+		mov	r1, #2
+printhex:	adr	r2, hexbuf
+		add	r3, r2, r1
+		mov	r1, #0
+		strb	r1, [r3]
+1:		and	r1, r0, #15
+		mov	r0, r0, lsr #4
+		cmp	r1, #10
+		addlt	r1, r1, #'0'
+		addge	r1, r1, #'a' - 10
+		strb	r1, [r3, #-1]!
+		teq	r3, r2
+		bne	1b
+		mov	r0, r2
+		b	printascii
+ENDPROC(printhex2)
+
+hexbuf:		.space 16
+
+		.ltorg
+
+#ifndef CONFIG_DEBUG_SEMIHOSTING
+
+ENTRY(printascii)
+		addruart_current r3, r1, r2
+		b	2f
+1:		waituart r2, r3
+		senduart r1, r3
+		busyuart r2, r3
+		teq	r1, #'\n'
+		moveq	r1, #'\r'
+		beq	1b
+2:		teq	r0, #0
+		ldrneb	r1, [r0], #1
+		teqne	r1, #0
+		bne	1b
+		ret	lr
+ENDPROC(printascii)
+
+ENTRY(printch)
+		addruart_current r3, r1, r2
+		mov	r1, r0
+		mov	r0, #0
+		b	1b
+ENDPROC(printch)
+
+#ifdef CONFIG_MMU
+ENTRY(debug_ll_addr)
+		addruart r2, r3, ip
+		str	r2, [r0]
+		str	r3, [r1]
+		ret	lr
+ENDPROC(debug_ll_addr)
+#endif
+
+#else
+
+ENTRY(printascii)
+		mov	r1, r0
+		mov	r0, #0x04		@ SYS_WRITE0
+	ARM(	svc	#0x123456	)
+	THUMB(	svc	#0xab		)
+		ret	lr
+ENDPROC(printascii)
+
+ENTRY(printch)
+		adr	r1, hexbuf
+		strb	r0, [r1]
+		mov	r0, #0x03		@ SYS_WRITEC
+	ARM(	svc	#0x123456	)
+	THUMB(	svc	#0xab		)
+		ret	lr
+ENDPROC(printch)
+
+ENTRY(debug_ll_addr)
+		mov	r2, #0
+		str	r2, [r0]
+		str	r2, [r1]
+		ret	lr
+ENDPROC(debug_ll_addr)
+
+#endif
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
new file mode 100644
index 000000000..11c54de9f
--- /dev/null
+++ b/arch/arm/kernel/devtree.c
@@ -0,0 +1,250 @@
+/*
+ *  linux/arch/arm/kernel/devtree.c
+ *
+ *  Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/smp.h>
+
+#include <asm/cputype.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/smp_plat.h>
+#include <asm/mach/arch.h>
+#include <asm/mach-types.h>
+
+
+#ifdef CONFIG_SMP
+extern struct of_cpu_method __cpu_method_of_table[];
+
+static const struct of_cpu_method __cpu_method_of_table_sentinel
+	__used __section(__cpu_method_of_table_end);
+
+
+static int __init set_smp_ops_by_method(struct device_node *node)
+{
+	const char *method;
+	struct of_cpu_method *m = __cpu_method_of_table;
+
+	if (of_property_read_string(node, "enable-method", &method))
+		return 0;
+
+	for (; m->method; m++)
+		if (!strcmp(m->method, method)) {
+			smp_set_ops(m->ops);
+			return 1;
+		}
+
+	return 0;
+}
+#else
+static inline int set_smp_ops_by_method(struct device_node *node)
+{
+	return 1;
+}
+#endif
+
+
+/*
+ * arm_dt_init_cpu_maps - Function retrieves cpu nodes from the device tree
+ * and builds the cpu logical map array containing MPIDR values related to
+ * logical cpus
+ *
+ * Updates the cpu possible mask with the number of parsed cpu nodes
+ */
+void __init arm_dt_init_cpu_maps(void)
+{
+	/*
+	 * Temp logical map is initialized with UINT_MAX values that are
+	 * considered invalid logical map entries since the logical map must
+	 * contain a list of MPIDR[23:0] values where MPIDR[31:24] must
+	 * read as 0.
+	 */
+	struct device_node *cpu, *cpus;
+	int found_method = 0;
+	u32 i, j, cpuidx = 1;
+	u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
+
+	u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID };
+	bool bootcpu_valid = false;
+	cpus = of_find_node_by_path("/cpus");
+
+	if (!cpus)
+		return;
+
+	for_each_child_of_node(cpus, cpu) {
+		u32 hwid;
+
+		if (of_node_cmp(cpu->type, "cpu"))
+			continue;
+
+		pr_debug(" * %s...\n", cpu->full_name);
+		/*
+		 * A device tree containing CPU nodes with missing "reg"
+		 * properties is considered invalid to build the
+		 * cpu_logical_map.
+		 */
+		if (of_property_read_u32(cpu, "reg", &hwid)) {
+			pr_debug(" * %s missing reg property\n",
+				     cpu->full_name);
+			return;
+		}
+
+		/*
+		 * 8 MSBs must be set to 0 in the DT since the reg property
+		 * defines the MPIDR[23:0].
+		 */
+		if (hwid & ~MPIDR_HWID_BITMASK)
+			return;
+
+		/*
+		 * Duplicate MPIDRs are a recipe for disaster.
+		 * Scan all initialized entries and check for
+		 * duplicates. If any is found just bail out.
+		 * temp values were initialized to UINT_MAX
+		 * to avoid matching valid MPIDR[23:0] values.
+		 */
+		for (j = 0; j < cpuidx; j++)
+			if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg "
+						     "properties in the DT\n"))
+				return;
+
+		/*
+		 * Build a stashed array of MPIDR values. Numbering scheme
+		 * requires that if detected the boot CPU must be assigned
+		 * logical id 0. Other CPUs get sequential indexes starting
+		 * from 1. If a CPU node with a reg property matching the
+		 * boot CPU MPIDR is detected, this is recorded so that the
+		 * logical map built from DT is validated and can be used
+		 * to override the map created in smp_setup_processor_id().
+		 */
+		if (hwid == mpidr) {
+			i = 0;
+			bootcpu_valid = true;
+		} else {
+			i = cpuidx++;
+		}
+
+		if (WARN(cpuidx > nr_cpu_ids, "DT /cpu %u nodes greater than "
+					       "max cores %u, capping them\n",
+					       cpuidx, nr_cpu_ids)) {
+			cpuidx = nr_cpu_ids;
+			break;
+		}
+
+		tmp_map[i] = hwid;
+
+		if (!found_method)
+			found_method = set_smp_ops_by_method(cpu);
+	}
+
+	/*
+	 * Fallback to an enable-method in the cpus node if nothing found in
+	 * a cpu node.
+	 */
+	if (!found_method)
+		set_smp_ops_by_method(cpus);
+
+	if (!bootcpu_valid) {
+		pr_warn("DT missing boot CPU MPIDR[23:0], fall back to default cpu_logical_map\n");
+		return;
+	}
+
+	/*
+	 * Since the boot CPU node contains proper data, and all nodes have
+	 * a reg property, the DT CPU list can be considered valid and the
+	 * logical map created in smp_setup_processor_id() can be overridden
+	 */
+	for (i = 0; i < cpuidx; i++) {
+		set_cpu_possible(i, true);
+		cpu_logical_map(i) = tmp_map[i];
+		pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i));
+	}
+}
+
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+	return phys_id == cpu_logical_map(cpu);
+}
+
+static const void * __init arch_get_next_mach(const char *const **match)
+{
+	static const struct machine_desc *mdesc = __arch_info_begin;
+	const struct machine_desc *m = mdesc;
+
+	if (m >= __arch_info_end)
+		return NULL;
+
+	mdesc++;
+	*match = m->dt_compat;
+	return m;
+}
+
+/**
+ * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
+ * @dt_phys: physical address of dt blob
+ *
+ * If a dtb was passed to the kernel in r2, then use it to choose the
+ * correct machine_desc and to setup the system.
+ */
+const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
+{
+	const struct machine_desc *mdesc, *mdesc_best = NULL;
+
+#ifdef CONFIG_ARCH_MULTIPLATFORM
+	DT_MACHINE_START(GENERIC_DT, "Generic DT based system")
+	MACHINE_END
+
+	mdesc_best = &__mach_desc_GENERIC_DT;
+#endif
+
+	if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys)))
+		return NULL;
+
+	mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach);
+
+	if (!mdesc) {
+		const char *prop;
+		int size;
+		unsigned long dt_root;
+
+		early_print("\nError: unrecognized/unsupported "
+			    "device tree compatible list:\n[ ");
+
+		dt_root = of_get_flat_dt_root();
+		prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
+		while (size > 0) {
+			early_print("'%s' ", prop);
+			size -= strlen(prop) + 1;
+			prop += strlen(prop) + 1;
+		}
+		early_print("]\n\n");
+
+		dump_machine_table(); /* does not return */
+	}
+
+	/* We really don't want to do this, but sometimes firmware provides buggy data */
+	if (mdesc->dt_fixup)
+		mdesc->dt_fixup();
+
+	early_init_dt_scan_nodes();
+
+	/* Change machine number to match the mdesc we're using */
+	__machine_arch_type = mdesc->nr;
+
+	return mdesc;
+}
diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c
new file mode 100644
index 000000000..84363fe7b
--- /dev/null
+++ b/arch/arm/kernel/dma-isa.c
@@ -0,0 +1,222 @@
+/*
+ *  linux/arch/arm/kernel/dma-isa.c
+ *
+ *  Copyright (C) 1999-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ISA DMA primitives
+ *  Taken from various sources, including:
+ *   linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ *     Written by Hennus Bergman, 1992.
+ *     High DMA channel support & info by Hannu Savolainen and John Boyd,
+ *     Nov. 1992.
+ *   arch/arm/kernel/dma-ebsa285.c
+ *   Copyright (C) 1998 Phil Blundell
+ */
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+
+#include <asm/dma.h>
+#include <asm/mach/dma.h>
+
+#define ISA_DMA_MASK		0
+#define ISA_DMA_MODE		1
+#define ISA_DMA_CLRFF		2
+#define ISA_DMA_PGHI		3
+#define ISA_DMA_PGLO		4
+#define ISA_DMA_ADDR		5
+#define ISA_DMA_COUNT		6
+
+static unsigned int isa_dma_port[8][7] = {
+	/* MASK   MODE   CLRFF  PAGE_HI PAGE_LO ADDR COUNT */
+	{  0x0a,  0x0b,  0x0c,  0x487,  0x087,  0x00, 0x01 },
+	{  0x0a,  0x0b,  0x0c,  0x483,  0x083,  0x02, 0x03 },
+	{  0x0a,  0x0b,  0x0c,  0x481,  0x081,  0x04, 0x05 },
+	{  0x0a,  0x0b,  0x0c,  0x482,  0x082,  0x06, 0x07 },
+	{  0xd4,  0xd6,  0xd8,  0x000,  0x000,  0xc0, 0xc2 },
+	{  0xd4,  0xd6,  0xd8,  0x48b,  0x08b,  0xc4, 0xc6 },
+	{  0xd4,  0xd6,  0xd8,  0x489,  0x089,  0xc8, 0xca },
+	{  0xd4,  0xd6,  0xd8,  0x48a,  0x08a,  0xcc, 0xce }
+};
+
+static int isa_get_dma_residue(unsigned int chan, dma_t *dma)
+{
+	unsigned int io_port = isa_dma_port[chan][ISA_DMA_COUNT];
+	int count;
+
+	count = 1 + inb(io_port);
+	count |= inb(io_port) << 8;
+
+	return chan < 4 ? count : (count << 1);
+}
+
+static void isa_enable_dma(unsigned int chan, dma_t *dma)
+{
+	if (dma->invalid) {
+		unsigned long address, length;
+		unsigned int mode;
+		enum dma_data_direction direction;
+
+		mode = (chan & 3) | dma->dma_mode;
+		switch (dma->dma_mode & DMA_MODE_MASK) {
+		case DMA_MODE_READ:
+			direction = DMA_FROM_DEVICE;
+			break;
+
+		case DMA_MODE_WRITE:
+			direction = DMA_TO_DEVICE;
+			break;
+
+		case DMA_MODE_CASCADE:
+			direction = DMA_BIDIRECTIONAL;
+			break;
+
+		default:
+			direction = DMA_NONE;
+			break;
+		}
+
+		if (!dma->sg) {
+			/*
+			 * Cope with ISA-style drivers which expect cache
+			 * coherence.
+			 */
+			dma->sg = &dma->buf;
+			dma->sgcount = 1;
+			dma->buf.length = dma->count;
+			dma->buf.dma_address = dma_map_single(NULL,
+				dma->addr, dma->count,
+				direction);
+		}
+
+		address = dma->buf.dma_address;
+		length  = dma->buf.length - 1;
+
+		outb(address >> 16, isa_dma_port[chan][ISA_DMA_PGLO]);
+		outb(address >> 24, isa_dma_port[chan][ISA_DMA_PGHI]);
+
+		if (chan >= 4) {
+			address >>= 1;
+			length >>= 1;
+		}
+
+		outb(0, isa_dma_port[chan][ISA_DMA_CLRFF]);
+
+		outb(address, isa_dma_port[chan][ISA_DMA_ADDR]);
+		outb(address >> 8, isa_dma_port[chan][ISA_DMA_ADDR]);
+
+		outb(length, isa_dma_port[chan][ISA_DMA_COUNT]);
+		outb(length >> 8, isa_dma_port[chan][ISA_DMA_COUNT]);
+
+		outb(mode, isa_dma_port[chan][ISA_DMA_MODE]);
+		dma->invalid = 0;
+	}
+	outb(chan & 3, isa_dma_port[chan][ISA_DMA_MASK]);
+}
+
+static void isa_disable_dma(unsigned int chan, dma_t *dma)
+{
+	outb(chan | 4, isa_dma_port[chan][ISA_DMA_MASK]);
+}
+
+static struct dma_ops isa_dma_ops = {
+	.type		= "ISA",
+	.enable		= isa_enable_dma,
+	.disable	= isa_disable_dma,
+	.residue	= isa_get_dma_residue,
+};
+
+static struct resource dma_resources[] = { {
+	.name	= "dma1",
+	.start	= 0x0000,
+	.end	= 0x000f
+}, {
+	.name	= "dma low page",
+	.start	= 0x0080,
+	.end 	= 0x008f
+}, {
+	.name	= "dma2",
+	.start	= 0x00c0,
+	.end	= 0x00df
+}, {
+	.name	= "dma high page",
+	.start	= 0x0480,
+	.end	= 0x048f
+} };
+
+static dma_t isa_dma[8];
+
+/*
+ * ISA DMA always starts at channel 0
+ */
+void __init isa_init_dma(void)
+{
+	/*
+	 * Try to autodetect presence of an ISA DMA controller.
+	 * We do some minimal initialisation, and check that
+	 * channel 0's DMA address registers are writeable.
+	 */
+	outb(0xff, 0x0d);
+	outb(0xff, 0xda);
+
+	/*
+	 * Write high and low address, and then read them back
+	 * in the same order.
+	 */
+	outb(0x55, 0x00);
+	outb(0xaa, 0x00);
+
+	if (inb(0) == 0x55 && inb(0) == 0xaa) {
+		unsigned int chan, i;
+
+		for (chan = 0; chan < 8; chan++) {
+			isa_dma[chan].d_ops = &isa_dma_ops;
+			isa_disable_dma(chan, NULL);
+		}
+
+		outb(0x40, 0x0b);
+		outb(0x41, 0x0b);
+		outb(0x42, 0x0b);
+		outb(0x43, 0x0b);
+
+		outb(0xc0, 0xd6);
+		outb(0x41, 0xd6);
+		outb(0x42, 0xd6);
+		outb(0x43, 0xd6);
+
+		outb(0, 0xd4);
+
+		outb(0x10, 0x08);
+		outb(0x10, 0xd0);
+
+		/*
+		 * Is this correct?  According to my documentation, it
+		 * doesn't appear to be.  It should be:
+		 *  outb(0x3f, 0x40b); outb(0x3f, 0x4d6);
+		 */
+		outb(0x30, 0x40b);
+		outb(0x31, 0x40b);
+		outb(0x32, 0x40b);
+		outb(0x33, 0x40b);
+		outb(0x31, 0x4d6);
+		outb(0x32, 0x4d6);
+		outb(0x33, 0x4d6);
+
+		for (i = 0; i < ARRAY_SIZE(dma_resources); i++)
+			request_resource(&ioport_resource, dma_resources + i);
+
+		for (chan = 0; chan < 8; chan++) {
+			int ret = isa_dma_add(chan, &isa_dma[chan]);
+			if (ret)
+				pr_err("ISADMA%u: unable to register: %d\n",
+				       chan, ret);
+		}
+
+		request_dma(DMA_ISA_CASCADE, "cascade");
+	}
+}
diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c
new file mode 100644
index 000000000..e651c4d0a
--- /dev/null
+++ b/arch/arm/kernel/dma.c
@@ -0,0 +1,298 @@
+/*
+ *  linux/arch/arm/kernel/dma.c
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Front-end to the DMA handling.  This handles the allocation/freeing
+ *  of DMA channels, and provides a unified interface to the machines
+ *  DMA facilities.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+
+#include <asm/dma.h>
+
+#include <asm/mach/dma.h>
+
+DEFINE_RAW_SPINLOCK(dma_spin_lock);
+EXPORT_SYMBOL(dma_spin_lock);
+
+static dma_t *dma_chan[MAX_DMA_CHANNELS];
+
+static inline dma_t *dma_channel(unsigned int chan)
+{
+	if (chan >= MAX_DMA_CHANNELS)
+		return NULL;
+
+	return dma_chan[chan];
+}
+
+int __init isa_dma_add(unsigned int chan, dma_t *dma)
+{
+	if (!dma->d_ops)
+		return -EINVAL;
+
+	sg_init_table(&dma->buf, 1);
+
+	if (dma_chan[chan])
+		return -EBUSY;
+	dma_chan[chan] = dma;
+	return 0;
+}
+
+/*
+ * Request DMA channel
+ *
+ * On certain platforms, we have to allocate an interrupt as well...
+ */
+int request_dma(unsigned int chan, const char *device_id)
+{
+	dma_t *dma = dma_channel(chan);
+	int ret;
+
+	if (!dma)
+		goto bad_dma;
+
+	if (xchg(&dma->lock, 1) != 0)
+		goto busy;
+
+	dma->device_id = device_id;
+	dma->active    = 0;
+	dma->invalid   = 1;
+
+	ret = 0;
+	if (dma->d_ops->request)
+		ret = dma->d_ops->request(chan, dma);
+
+	if (ret)
+		xchg(&dma->lock, 0);
+
+	return ret;
+
+bad_dma:
+	pr_err("dma: trying to allocate DMA%d\n", chan);
+	return -EINVAL;
+
+busy:
+	return -EBUSY;
+}
+EXPORT_SYMBOL(request_dma);
+
+/*
+ * Free DMA channel
+ *
+ * On certain platforms, we have to free interrupt as well...
+ */
+void free_dma(unsigned int chan)
+{
+	dma_t *dma = dma_channel(chan);
+
+	if (!dma)
+		goto bad_dma;
+
+	if (dma->active) {
+		pr_err("dma%d: freeing active DMA\n", chan);
+		dma->d_ops->disable(chan, dma);
+		dma->active = 0;
+	}
+
+	if (xchg(&dma->lock, 0) != 0) {
+		if (dma->d_ops->free)
+			dma->d_ops->free(chan, dma);
+		return;
+	}
+
+	pr_err("dma%d: trying to free free DMA\n", chan);
+	return;
+
+bad_dma:
+	pr_err("dma: trying to free DMA%d\n", chan);
+}
+EXPORT_SYMBOL(free_dma);
+
+/* Set DMA Scatter-Gather list
+ */
+void set_dma_sg (unsigned int chan, struct scatterlist *sg, int nr_sg)
+{
+	dma_t *dma = dma_channel(chan);
+
+	if (dma->active)
+		pr_err("dma%d: altering DMA SG while DMA active\n", chan);
+
+	dma->sg = sg;
+	dma->sgcount = nr_sg;
+	dma->invalid = 1;
+}
+EXPORT_SYMBOL(set_dma_sg);
+
+/* Set DMA address
+ *
+ * Copy address to the structure, and set the invalid bit
+ */
+void __set_dma_addr (unsigned int chan, void *addr)
+{
+	dma_t *dma = dma_channel(chan);
+
+	if (dma->active)
+		pr_err("dma%d: altering DMA address while DMA active\n", chan);
+
+	dma->sg = NULL;
+	dma->addr = addr;
+	dma->invalid = 1;
+}
+EXPORT_SYMBOL(__set_dma_addr);
+
+/* Set DMA byte count
+ *
+ * Copy address to the structure, and set the invalid bit
+ */
+void set_dma_count (unsigned int chan, unsigned long count)
+{
+	dma_t *dma = dma_channel(chan);
+
+	if (dma->active)
+		pr_err("dma%d: altering DMA count while DMA active\n", chan);
+
+	dma->sg = NULL;
+	dma->count = count;
+	dma->invalid = 1;
+}
+EXPORT_SYMBOL(set_dma_count);
+
+/* Set DMA direction mode
+ */
+void set_dma_mode (unsigned int chan, unsigned int mode)
+{
+	dma_t *dma = dma_channel(chan);
+
+	if (dma->active)
+		pr_err("dma%d: altering DMA mode while DMA active\n", chan);
+
+	dma->dma_mode = mode;
+	dma->invalid = 1;
+}
+EXPORT_SYMBOL(set_dma_mode);
+
+/* Enable DMA channel
+ */
+void enable_dma (unsigned int chan)
+{
+	dma_t *dma = dma_channel(chan);
+
+	if (!dma->lock)
+		goto free_dma;
+
+	if (dma->active == 0) {
+		dma->active = 1;
+		dma->d_ops->enable(chan, dma);
+	}
+	return;
+
+free_dma:
+	pr_err("dma%d: trying to enable free DMA\n", chan);
+	BUG();
+}
+EXPORT_SYMBOL(enable_dma);
+
+/* Disable DMA channel
+ */
+void disable_dma (unsigned int chan)
+{
+	dma_t *dma = dma_channel(chan);
+
+	if (!dma->lock)
+		goto free_dma;
+
+	if (dma->active == 1) {
+		dma->active = 0;
+		dma->d_ops->disable(chan, dma);
+	}
+	return;
+
+free_dma:
+	pr_err("dma%d: trying to disable free DMA\n", chan);
+	BUG();
+}
+EXPORT_SYMBOL(disable_dma);
+
+/*
+ * Is the specified DMA channel active?
+ */
+int dma_channel_active(unsigned int chan)
+{
+	dma_t *dma = dma_channel(chan);
+	return dma->active;
+}
+EXPORT_SYMBOL(dma_channel_active);
+
+void set_dma_page(unsigned int chan, char pagenr)
+{
+	pr_err("dma%d: trying to set_dma_page\n", chan);
+}
+EXPORT_SYMBOL(set_dma_page);
+
+void set_dma_speed(unsigned int chan, int cycle_ns)
+{
+	dma_t *dma = dma_channel(chan);
+	int ret = 0;
+
+	if (dma->d_ops->setspeed)
+		ret = dma->d_ops->setspeed(chan, dma, cycle_ns);
+	dma->speed = ret;
+}
+EXPORT_SYMBOL(set_dma_speed);
+
+int get_dma_residue(unsigned int chan)
+{
+	dma_t *dma = dma_channel(chan);
+	int ret = 0;
+
+	if (dma->d_ops->residue)
+		ret = dma->d_ops->residue(chan, dma);
+
+	return ret;
+}
+EXPORT_SYMBOL(get_dma_residue);
+
+#ifdef CONFIG_PROC_FS
+static int proc_dma_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
+		dma_t *dma = dma_channel(i);
+		if (dma && dma->lock)
+			seq_printf(m, "%2d: %s\n", i, dma->device_id);
+	}
+	return 0;
+}
+
+static int proc_dma_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_dma_show, NULL);
+}
+
+static const struct file_operations proc_dma_operations = {
+	.open		= proc_dma_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init proc_dma_init(void)
+{
+	proc_create("dma", 0, NULL, &proc_dma_operations);
+	return 0;
+}
+
+__initcall(proc_dma_init);
+#endif
diff --git a/arch/arm/kernel/early_printk.c b/arch/arm/kernel/early_printk.c
new file mode 100644
index 000000000..430765369
--- /dev/null
+++ b/arch/arm/kernel/early_printk.c
@@ -0,0 +1,46 @@
+/*
+ *  linux/arch/arm/kernel/early_printk.c
+ *
+ *  Copyright (C) 2009 Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/init.h>
+
+extern void printch(int);
+
+static void early_write(const char *s, unsigned n)
+{
+	while (n-- > 0) {
+		if (*s == '\n')
+			printch('\r');
+		printch(*s);
+		s++;
+	}
+}
+
+static void early_console_write(struct console *con, const char *s, unsigned n)
+{
+	early_write(s, n);
+}
+
+static struct console early_console_dev = {
+	.name =		"earlycon",
+	.write =	early_console_write,
+	.flags =	CON_PRINTBUFFER | CON_BOOT,
+	.index =	-1,
+};
+
+static int __init setup_early_printk(char *buf)
+{
+	early_console = &early_console_dev;
+	register_console(&early_console_dev);
+	return 0;
+}
+
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c
new file mode 100644
index 000000000..d0d1e8315
--- /dev/null
+++ b/arch/arm/kernel/elf.c
@@ -0,0 +1,91 @@
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/elf.h>
+#include <asm/system_info.h>
+
+int elf_check_arch(const struct elf32_hdr *x)
+{
+	unsigned int eflags;
+
+	/* Make sure it's an ARM executable */
+	if (x->e_machine != EM_ARM)
+		return 0;
+
+	/* Make sure the entry address is reasonable */
+	if (x->e_entry & 1) {
+		if (!(elf_hwcap & HWCAP_THUMB))
+			return 0;
+	} else if (x->e_entry & 3)
+		return 0;
+
+	eflags = x->e_flags;
+	if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN) {
+		unsigned int flt_fmt;
+
+		/* APCS26 is only allowed if the CPU supports it */
+		if ((eflags & EF_ARM_APCS_26) && !(elf_hwcap & HWCAP_26BIT))
+			return 0;
+
+		flt_fmt = eflags & (EF_ARM_VFP_FLOAT | EF_ARM_SOFT_FLOAT);
+
+		/* VFP requires the supporting code */
+		if (flt_fmt == EF_ARM_VFP_FLOAT && !(elf_hwcap & HWCAP_VFP))
+			return 0;
+	}
+	return 1;
+}
+EXPORT_SYMBOL(elf_check_arch);
+
+void elf_set_personality(const struct elf32_hdr *x)
+{
+	unsigned int eflags = x->e_flags;
+	unsigned int personality = current->personality & ~PER_MASK;
+
+	/*
+	 * We only support Linux ELF executables, so always set the
+	 * personality to LINUX.
+	 */
+	personality |= PER_LINUX;
+
+	/*
+	 * APCS-26 is only valid for OABI executables
+	 */
+	if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN &&
+	    (eflags & EF_ARM_APCS_26))
+		personality &= ~ADDR_LIMIT_32BIT;
+	else
+		personality |= ADDR_LIMIT_32BIT;
+
+	set_personality(personality);
+
+	/*
+	 * Since the FPA coprocessor uses CP1 and CP2, and iWMMXt uses CP0
+	 * and CP1, we only enable access to the iWMMXt coprocessor if the
+	 * binary is EABI or softfloat (and thus, guaranteed not to use
+	 * FPA instructions.)
+	 */
+	if (elf_hwcap & HWCAP_IWMMXT &&
+	    eflags & (EF_ARM_EABI_MASK | EF_ARM_SOFT_FLOAT)) {
+		set_thread_flag(TIF_USING_IWMMXT);
+	} else {
+		clear_thread_flag(TIF_USING_IWMMXT);
+	}
+}
+EXPORT_SYMBOL(elf_set_personality);
+
+/*
+ * Set READ_IMPLIES_EXEC if:
+ *  - the binary requires an executable stack
+ *  - we're running on a CPU which doesn't support NX.
+ */
+int arm_elf_read_implies_exec(const struct elf32_hdr *x, int executable_stack)
+{
+	if (executable_stack != EXSTACK_DISABLE_X)
+		return 1;
+	if (cpu_architecture() < CPU_ARCH_ARMv6)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL(arm_elf_read_implies_exec);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
new file mode 100644
index 000000000..570306c49
--- /dev/null
+++ b/arch/arm/kernel/entry-armv.S
@@ -0,0 +1,1238 @@
+/*
+ *  linux/arch/arm/kernel/entry-armv.S
+ *
+ *  Copyright (C) 1996,1997,1998 Russell King.
+ *  ARM700 fix by Matthew Godbolt (linux-user@willothewisp.demon.co.uk)
+ *  nommu support by Hyok S. Choi (hyok.choi@samsung.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Low-level vector interface routines
+ *
+ *  Note:  there is a StrongARM bug in the STMIA rn, {regs}^ instruction
+ *  that causes it to save wrong values...  Be aware!
+ */
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/glue-df.h>
+#include <asm/glue-pf.h>
+#include <asm/vfpmacros.h>
+#ifndef CONFIG_MULTI_IRQ_HANDLER
+#include <mach/entry-macro.S>
+#endif
+#include <asm/thread_notify.h>
+#include <asm/unwind.h>
+#include <asm/unistd.h>
+#include <asm/tls.h>
+#include <asm/system_info.h>
+
+#include "entry-header.S"
+#include <asm/entry-macro-multi.S>
+#include <asm/probes.h>
+
+/*
+ * Interrupt handling.
+ */
+	.macro	irq_handler
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+	ldr	r1, =handle_arch_irq
+	mov	r0, sp
+	adr	lr, BSYM(9997f)
+	ldr	pc, [r1]
+#else
+	arch_irq_handler_default
+#endif
+9997:
+	.endm
+
+	.macro	pabt_helper
+	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
+#ifdef MULTI_PABORT
+	ldr	ip, .LCprocfns
+	mov	lr, pc
+	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
+#else
+	bl	CPU_PABORT_HANDLER
+#endif
+	.endm
+
+	.macro	dabt_helper
+
+	@
+	@ Call the processor-specific abort handler:
+	@
+	@  r2 - pt_regs
+	@  r4 - aborted context pc
+	@  r5 - aborted context psr
+	@
+	@ The abort handler must return the aborted address in r0, and
+	@ the fault status register in r1.  r9 must be preserved.
+	@
+#ifdef MULTI_DABORT
+	ldr	ip, .LCprocfns
+	mov	lr, pc
+	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
+#else
+	bl	CPU_DABORT_HANDLER
+#endif
+	.endm
+
+#ifdef CONFIG_KPROBES
+	.section	.kprobes.text,"ax",%progbits
+#else
+	.text
+#endif
+
+/*
+ * Invalid mode handlers
+ */
+	.macro	inv_entry, reason
+	sub	sp, sp, #S_FRAME_SIZE
+ ARM(	stmib	sp, {r1 - lr}		)
+ THUMB(	stmia	sp, {r0 - r12}		)
+ THUMB(	str	sp, [sp, #S_SP]		)
+ THUMB(	str	lr, [sp, #S_LR]		)
+	mov	r1, #\reason
+	.endm
+
+__pabt_invalid:
+	inv_entry BAD_PREFETCH
+	b	common_invalid
+ENDPROC(__pabt_invalid)
+
+__dabt_invalid:
+	inv_entry BAD_DATA
+	b	common_invalid
+ENDPROC(__dabt_invalid)
+
+__irq_invalid:
+	inv_entry BAD_IRQ
+	b	common_invalid
+ENDPROC(__irq_invalid)
+
+__und_invalid:
+	inv_entry BAD_UNDEFINSTR
+
+	@
+	@ XXX fall through to common_invalid
+	@
+
+@
+@ common_invalid - generic code for failed exception (re-entrant version of handlers)
+@
+common_invalid:
+	zero_fp
+
+	ldmia	r0, {r4 - r6}
+	add	r0, sp, #S_PC		@ here for interlock avoidance
+	mov	r7, #-1			@  ""   ""    ""        ""
+	str	r4, [sp]		@ save preserved r0
+	stmia	r0, {r5 - r7}		@ lr_<exception>,
+					@ cpsr_<exception>, "old_r0"
+
+	mov	r0, sp
+	b	bad_mode
+ENDPROC(__und_invalid)
+
+/*
+ * SVC mode handlers
+ */
+
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+#define SPFIX(code...) code
+#else
+#define SPFIX(code...)
+#endif
+
+	.macro	svc_entry, stack_hole=0, trace=1
+ UNWIND(.fnstart		)
+ UNWIND(.save {r0 - pc}		)
+	sub	sp, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+#ifdef CONFIG_THUMB2_KERNEL
+ SPFIX(	str	r0, [sp]	)	@ temporarily saved
+ SPFIX(	mov	r0, sp		)
+ SPFIX(	tst	r0, #4		)	@ test original stack alignment
+ SPFIX(	ldr	r0, [sp]	)	@ restored
+#else
+ SPFIX(	tst	sp, #4		)
+#endif
+ SPFIX(	subeq	sp, sp, #4	)
+	stmia	sp, {r1 - r12}
+
+	ldmia	r0, {r3 - r5}
+	add	r7, sp, #S_SP - 4	@ here for interlock avoidance
+	mov	r6, #-1			@  ""  ""      ""       ""
+	add	r2, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+ SPFIX(	addeq	r2, r2, #4	)
+	str	r3, [sp, #-4]!		@ save the "real" r0 copied
+					@ from the exception stack
+
+	mov	r3, lr
+
+	@
+	@ We are now ready to fill in the remaining blanks on the stack:
+	@
+	@  r2 - sp_svc
+	@  r3 - lr_svc
+	@  r4 - lr_<exception>, already fixed up for correct return/restart
+	@  r5 - spsr_<exception>
+	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
+	@
+	stmia	r7, {r2 - r6}
+
+	.if \trace
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off
+#endif
+	.endif
+	.endm
+
+	.align	5
+__dabt_svc:
+	svc_entry
+	mov	r2, sp
+	dabt_helper
+ THUMB(	ldr	r5, [sp, #S_PSR]	)	@ potentially updated CPSR
+	svc_exit r5				@ return from exception
+ UNWIND(.fnend		)
+ENDPROC(__dabt_svc)
+
+	.align	5
+__irq_svc:
+	svc_entry
+	irq_handler
+
+#ifdef CONFIG_PREEMPT
+	get_thread_info tsk
+	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
+	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
+	teq	r8, #0				@ if preempt count != 0
+	movne	r0, #0				@ force flags to 0
+	tst	r0, #_TIF_NEED_RESCHED
+	blne	svc_preempt
+#endif
+
+	svc_exit r5, irq = 1			@ return from exception
+ UNWIND(.fnend		)
+ENDPROC(__irq_svc)
+
+	.ltorg
+
+#ifdef CONFIG_PREEMPT
+svc_preempt:
+	mov	r8, lr
+1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
+	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
+	tst	r0, #_TIF_NEED_RESCHED
+	reteq	r8				@ go again
+	b	1b
+#endif
+
+__und_fault:
+	@ Correct the PC such that it is pointing at the instruction
+	@ which caused the fault.  If the faulting instruction was ARM
+	@ the PC will be pointing at the next instruction, and have to
+	@ subtract 4.  Otherwise, it is Thumb, and the PC will be
+	@ pointing at the second half of the Thumb instruction.  We
+	@ have to subtract 2.
+	ldr	r2, [r0, #S_PC]
+	sub	r2, r2, r1
+	str	r2, [r0, #S_PC]
+	b	do_undefinstr
+ENDPROC(__und_fault)
+
+	.align	5
+__und_svc:
+#ifdef CONFIG_KPROBES
+	@ If a kprobe is about to simulate a "stmdb sp..." instruction,
+	@ it obviously needs free stack space which then will belong to
+	@ the saved context.
+	svc_entry MAX_STACK_SIZE
+#else
+	svc_entry
+#endif
+	@
+	@ call emulation code, which returns using r9 if it has emulated
+	@ the instruction, or the more conventional lr if we are to treat
+	@ this as a real undefined instruction
+	@
+	@  r0 - instruction
+	@
+#ifndef CONFIG_THUMB2_KERNEL
+	ldr	r0, [r4, #-4]
+#else
+	mov	r1, #2
+	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
+	cmp	r0, #0xe800			@ 32-bit instruction if xx >= 0
+	blo	__und_svc_fault
+	ldrh	r9, [r4]			@ bottom 16 bits
+	add	r4, r4, #2
+	str	r4, [sp, #S_PC]
+	orr	r0, r9, r0, lsl #16
+#endif
+	adr	r9, BSYM(__und_svc_finish)
+	mov	r2, r4
+	bl	call_fpe
+
+	mov	r1, #4				@ PC correction to apply
+__und_svc_fault:
+	mov	r0, sp				@ struct pt_regs *regs
+	bl	__und_fault
+
+__und_svc_finish:
+	ldr	r5, [sp, #S_PSR]		@ Get SVC cpsr
+	svc_exit r5				@ return from exception
+ UNWIND(.fnend		)
+ENDPROC(__und_svc)
+
+	.align	5
+__pabt_svc:
+	svc_entry
+	mov	r2, sp				@ regs
+	pabt_helper
+	svc_exit r5				@ return from exception
+ UNWIND(.fnend		)
+ENDPROC(__pabt_svc)
+
+	.align	5
+__fiq_svc:
+	svc_entry trace=0
+	mov	r0, sp				@ struct pt_regs *regs
+	bl	handle_fiq_as_nmi
+	svc_exit_via_fiq
+ UNWIND(.fnend		)
+ENDPROC(__fiq_svc)
+
+	.align	5
+.LCcralign:
+	.word	cr_alignment
+#ifdef MULTI_DABORT
+.LCprocfns:
+	.word	processor
+#endif
+.LCfp:
+	.word	fp_enter
+
+/*
+ * Abort mode handlers
+ */
+
+@
+@ Taking a FIQ in abort mode is similar to taking a FIQ in SVC mode
+@ and reuses the same macros. However in abort mode we must also
+@ save/restore lr_abt and spsr_abt to make nested aborts safe.
+@
+	.align 5
+__fiq_abt:
+	svc_entry trace=0
+
+ ARM(	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+	mov	r1, lr		@ Save lr_abt
+	mrs	r2, spsr	@ Save spsr_abt, abort is now safe
+ ARM(	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+	stmfd	sp!, {r1 - r2}
+
+	add	r0, sp, #8			@ struct pt_regs *regs
+	bl	handle_fiq_as_nmi
+
+	ldmfd	sp!, {r1 - r2}
+ ARM(	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+	mov	lr, r1		@ Restore lr_abt, abort is unsafe
+	msr	spsr_cxsf, r2	@ Restore spsr_abt
+ ARM(	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+
+	svc_exit_via_fiq
+ UNWIND(.fnend		)
+ENDPROC(__fiq_abt)
+
+/*
+ * User mode handlers
+ *
+ * EABI note: sp_svc is always 64-bit aligned here, so should S_FRAME_SIZE
+ */
+
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) && (S_FRAME_SIZE & 7)
+#error "sizeof(struct pt_regs) must be a multiple of 8"
+#endif
+
+	.macro	usr_entry, trace=1
+ UNWIND(.fnstart	)
+ UNWIND(.cantunwind	)	@ don't unwind the user space
+	sub	sp, sp, #S_FRAME_SIZE
+ ARM(	stmib	sp, {r1 - r12}	)
+ THUMB(	stmia	sp, {r0 - r12}	)
+
+ ATRAP(	mrc	p15, 0, r7, c1, c0, 0)
+ ATRAP(	ldr	r8, .LCcralign)
+
+	ldmia	r0, {r3 - r5}
+	add	r0, sp, #S_PC		@ here for interlock avoidance
+	mov	r6, #-1			@  ""  ""     ""        ""
+
+	str	r3, [sp]		@ save the "real" r0 copied
+					@ from the exception stack
+
+ ATRAP(	ldr	r8, [r8, #0])
+
+	@
+	@ We are now ready to fill in the remaining blanks on the stack:
+	@
+	@  r4 - lr_<exception>, already fixed up for correct return/restart
+	@  r5 - spsr_<exception>
+	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
+	@
+	@ Also, separately save sp_usr and lr_usr
+	@
+	stmia	r0, {r4 - r6}
+ ARM(	stmdb	r0, {sp, lr}^			)
+ THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
+
+	@ Enable the alignment trap while in kernel mode
+ ATRAP(	teq	r8, r7)
+ ATRAP( mcrne	p15, 0, r8, c1, c0, 0)
+
+	@
+	@ Clear FP to mark the first stack frame
+	@
+	zero_fp
+
+	.if	\trace
+#ifdef CONFIG_IRQSOFF_TRACER
+	bl	trace_hardirqs_off
+#endif
+	ct_user_exit save = 0
+	.endif
+	.endm
+
+	.macro	kuser_cmpxchg_check
+#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) && \
+    !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+#ifndef CONFIG_MMU
+#warning "NPTL on non MMU needs fixing"
+#else
+	@ Make sure our user space atomic helper is restarted
+	@ if it was interrupted in a critical region.  Here we
+	@ perform a quick test inline since it should be false
+	@ 99.9999% of the time.  The rest is done out of line.
+	cmp	r4, #TASK_SIZE
+	blhs	kuser_cmpxchg64_fixup
+#endif
+#endif
+	.endm
+
+	.align	5
+__dabt_usr:
+	usr_entry
+	kuser_cmpxchg_check
+	mov	r2, sp
+	dabt_helper
+	b	ret_from_exception
+ UNWIND(.fnend		)
+ENDPROC(__dabt_usr)
+
+	.align	5
+__irq_usr:
+	usr_entry
+	kuser_cmpxchg_check
+	irq_handler
+	get_thread_info tsk
+	mov	why, #0
+	b	ret_to_user_from_irq
+ UNWIND(.fnend		)
+ENDPROC(__irq_usr)
+
+	.ltorg
+
+	.align	5
+__und_usr:
+	usr_entry
+
+	mov	r2, r4
+	mov	r3, r5
+
+	@ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
+	@      faulting instruction depending on Thumb mode.
+	@ r3 = regs->ARM_cpsr
+	@
+	@ The emulation code returns using r9 if it has emulated the
+	@ instruction, or the more conventional lr if we are to treat
+	@ this as a real undefined instruction
+	@
+	adr	r9, BSYM(ret_from_exception)
+
+	@ IRQs must be enabled before attempting to read the instruction from
+	@ user space since that could cause a page/translation fault if the
+	@ page table was modified by another CPU.
+	enable_irq
+
+	tst	r3, #PSR_T_BIT			@ Thumb mode?
+	bne	__und_usr_thumb
+	sub	r4, r2, #4			@ ARM instr at LR - 4
+1:	ldrt	r0, [r4]
+ ARM_BE8(rev	r0, r0)				@ little endian instruction
+
+	@ r0 = 32-bit ARM instruction which caused the exception
+	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
+	@ r4 = PC value for the faulting instruction
+	@ lr = 32-bit undefined instruction function
+	adr	lr, BSYM(__und_usr_fault_32)
+	b	call_fpe
+
+__und_usr_thumb:
+	@ Thumb instruction
+	sub	r4, r2, #2			@ First half of thumb instr at LR - 2
+#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
+/*
+ * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
+ * can never be supported in a single kernel, this code is not applicable at
+ * all when __LINUX_ARM_ARCH__ < 6.  This allows simplifying assumptions to be
+ * made about .arch directives.
+ */
+#if __LINUX_ARM_ARCH__ < 7
+/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
+#define NEED_CPU_ARCHITECTURE
+	ldr	r5, .LCcpu_architecture
+	ldr	r5, [r5]
+	cmp	r5, #CPU_ARCH_ARMv7
+	blo	__und_usr_fault_16		@ 16bit undefined instruction
+/*
+ * The following code won't get run unless the running CPU really is v7, so
+ * coding round the lack of ldrht on older arches is pointless.  Temporarily
+ * override the assembler target arch with the minimum required instead:
+ */
+	.arch	armv6t2
+#endif
+2:	ldrht	r5, [r4]
+ARM_BE8(rev16	r5, r5)				@ little endian instruction
+	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
+	blo	__und_usr_fault_16		@ 16bit undefined instruction
+3:	ldrht	r0, [r2]
+ARM_BE8(rev16	r0, r0)				@ little endian instruction
+	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
+	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
+	orr	r0, r0, r5, lsl #16
+	adr	lr, BSYM(__und_usr_fault_32)
+	@ r0 = the two 16-bit Thumb instructions which caused the exception
+	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
+	@ r4 = PC value for the first 16-bit Thumb instruction
+	@ lr = 32bit undefined instruction function
+
+#if __LINUX_ARM_ARCH__ < 7
+/* If the target arch was overridden, change it back: */
+#ifdef CONFIG_CPU_32v6K
+	.arch	armv6k
+#else
+	.arch	armv6
+#endif
+#endif /* __LINUX_ARM_ARCH__ < 7 */
+#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
+	b	__und_usr_fault_16
+#endif
+ UNWIND(.fnend)
+ENDPROC(__und_usr)
+
+/*
+ * The out of line fixup for the ldrt instructions above.
+ */
+	.pushsection .text.fixup, "ax"
+	.align	2
+4:	str     r4, [sp, #S_PC]			@ retry current instruction
+	ret	r9
+	.popsection
+	.pushsection __ex_table,"a"
+	.long	1b, 4b
+#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
+	.long	2b, 4b
+	.long	3b, 4b
+#endif
+	.popsection
+
+/*
+ * Check whether the instruction is a co-processor instruction.
+ * If yes, we need to call the relevant co-processor handler.
+ *
+ * Note that we don't do a full check here for the co-processor
+ * instructions; all instructions with bit 27 set are well
+ * defined.  The only instructions that should fault are the
+ * co-processor instructions.  However, we have to watch out
+ * for the ARM6/ARM7 SWI bug.
+ *
+ * NEON is a special case that has to be handled here. Not all
+ * NEON instructions are co-processor instructions, so we have
+ * to make a special case of checking for them. Plus, there's
+ * five groups of them, so we have a table of mask/opcode pairs
+ * to check against, and if any match then we branch off into the
+ * NEON handler code.
+ *
+ * Emulators may wish to make use of the following registers:
+ *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
+ *  r2  = PC value to resume execution after successful emulation
+ *  r9  = normal "successful" return address
+ *  r10 = this threads thread_info structure
+ *  lr  = unrecognised instruction return address
+ * IRQs enabled, FIQs enabled.
+ */
+	@
+	@ Fall-through from Thumb-2 __und_usr
+	@
+#ifdef CONFIG_NEON
+	get_thread_info r10			@ get current thread
+	adr	r6, .LCneon_thumb_opcodes
+	b	2f
+#endif
+call_fpe:
+	get_thread_info r10			@ get current thread
+#ifdef CONFIG_NEON
+	adr	r6, .LCneon_arm_opcodes
+2:	ldr	r5, [r6], #4			@ mask value
+	ldr	r7, [r6], #4			@ opcode bits matching in mask
+	cmp	r5, #0				@ end mask?
+	beq	1f
+	and	r8, r0, r5
+	cmp	r8, r7				@ NEON instruction?
+	bne	2b
+	mov	r7, #1
+	strb	r7, [r10, #TI_USED_CP + 10]	@ mark CP#10 as used
+	strb	r7, [r10, #TI_USED_CP + 11]	@ mark CP#11 as used
+	b	do_vfp				@ let VFP handler handle this
+1:
+#endif
+	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
+	tstne	r0, #0x04000000			@ bit 26 set on both ARM and Thumb-2
+	reteq	lr
+	and	r8, r0, #0x00000f00		@ mask out CP number
+ THUMB(	lsr	r8, r8, #8		)
+	mov	r7, #1
+	add	r6, r10, #TI_USED_CP
+ ARM(	strb	r7, [r6, r8, lsr #8]	)	@ set appropriate used_cp[]
+ THUMB(	strb	r7, [r6, r8]		)	@ set appropriate used_cp[]
+#ifdef CONFIG_IWMMXT
+	@ Test if we need to give access to iWMMXt coprocessors
+	ldr	r5, [r10, #TI_FLAGS]
+	rsbs	r7, r8, #(1 << 8)		@ CP 0 or 1 only
+	movcss	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
+	bcs	iwmmxt_task_enable
+#endif
+ ARM(	add	pc, pc, r8, lsr #6	)
+ THUMB(	lsl	r8, r8, #2		)
+ THUMB(	add	pc, r8			)
+	nop
+
+	ret.w	lr				@ CP#0
+	W(b)	do_fpe				@ CP#1 (FPE)
+	W(b)	do_fpe				@ CP#2 (FPE)
+	ret.w	lr				@ CP#3
+#ifdef CONFIG_CRUNCH
+	b	crunch_task_enable		@ CP#4 (MaverickCrunch)
+	b	crunch_task_enable		@ CP#5 (MaverickCrunch)
+	b	crunch_task_enable		@ CP#6 (MaverickCrunch)
+#else
+	ret.w	lr				@ CP#4
+	ret.w	lr				@ CP#5
+	ret.w	lr				@ CP#6
+#endif
+	ret.w	lr				@ CP#7
+	ret.w	lr				@ CP#8
+	ret.w	lr				@ CP#9
+#ifdef CONFIG_VFP
+	W(b)	do_vfp				@ CP#10 (VFP)
+	W(b)	do_vfp				@ CP#11 (VFP)
+#else
+	ret.w	lr				@ CP#10 (VFP)
+	ret.w	lr				@ CP#11 (VFP)
+#endif
+	ret.w	lr				@ CP#12
+	ret.w	lr				@ CP#13
+	ret.w	lr				@ CP#14 (Debug)
+	ret.w	lr				@ CP#15 (Control)
+
+#ifdef NEED_CPU_ARCHITECTURE
+	.align	2
+.LCcpu_architecture:
+	.word	__cpu_architecture
+#endif
+
+#ifdef CONFIG_NEON
+	.align	6
+
+.LCneon_arm_opcodes:
+	.word	0xfe000000			@ mask
+	.word	0xf2000000			@ opcode
+
+	.word	0xff100000			@ mask
+	.word	0xf4000000			@ opcode
+
+	.word	0x00000000			@ mask
+	.word	0x00000000			@ opcode
+
+.LCneon_thumb_opcodes:
+	.word	0xef000000			@ mask
+	.word	0xef000000			@ opcode
+
+	.word	0xff100000			@ mask
+	.word	0xf9000000			@ opcode
+
+	.word	0x00000000			@ mask
+	.word	0x00000000			@ opcode
+#endif
+
+do_fpe:
+	ldr	r4, .LCfp
+	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
+	ldr	pc, [r4]			@ Call FP module USR entry point
+
+/*
+ * The FP module is called with these registers set:
+ *  r0  = instruction
+ *  r2  = PC+4
+ *  r9  = normal "successful" return address
+ *  r10 = FP workspace
+ *  lr  = unrecognised FP instruction return address
+ */
+
+	.pushsection .data
+ENTRY(fp_enter)
+	.word	no_fp
+	.popsection
+
+ENTRY(no_fp)
+	ret	lr
+ENDPROC(no_fp)
+
+__und_usr_fault_32:
+	mov	r1, #4
+	b	1f
+__und_usr_fault_16:
+	mov	r1, #2
+1:	mov	r0, sp
+	adr	lr, BSYM(ret_from_exception)
+	b	__und_fault
+ENDPROC(__und_usr_fault_32)
+ENDPROC(__und_usr_fault_16)
+
+	.align	5
+__pabt_usr:
+	usr_entry
+	mov	r2, sp				@ regs
+	pabt_helper
+ UNWIND(.fnend		)
+	/* fall through */
+/*
+ * This is the return code to user mode for abort handlers
+ */
+ENTRY(ret_from_exception)
+ UNWIND(.fnstart	)
+ UNWIND(.cantunwind	)
+	get_thread_info tsk
+	mov	why, #0
+	b	ret_to_user
+ UNWIND(.fnend		)
+ENDPROC(__pabt_usr)
+ENDPROC(ret_from_exception)
+
+	.align	5
+__fiq_usr:
+	usr_entry trace=0
+	kuser_cmpxchg_check
+	mov	r0, sp				@ struct pt_regs *regs
+	bl	handle_fiq_as_nmi
+	get_thread_info tsk
+	restore_user_regs fast = 0, offset = 0
+ UNWIND(.fnend		)
+ENDPROC(__fiq_usr)
+
+/*
+ * Register switch for ARMv3 and ARMv4 processors
+ * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
+ * previous and next are guaranteed not to be the same.
+ */
+ENTRY(__switch_to)
+ UNWIND(.fnstart	)
+ UNWIND(.cantunwind	)
+	add	ip, r1, #TI_CPU_SAVE
+ ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
+ THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
+ THUMB(	str	sp, [ip], #4		   )
+ THUMB(	str	lr, [ip], #4		   )
+	ldr	r4, [r2, #TI_TP_VALUE]
+	ldr	r5, [r2, #TI_TP_VALUE + 4]
+#ifdef CONFIG_CPU_USE_DOMAINS
+	ldr	r6, [r2, #TI_CPU_DOMAIN]
+#endif
+	switch_tls r1, r4, r5, r3, r7
+#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+	ldr	r7, [r2, #TI_TASK]
+	ldr	r8, =__stack_chk_guard
+	ldr	r7, [r7, #TSK_STACK_CANARY]
+#endif
+#ifdef CONFIG_CPU_USE_DOMAINS
+	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
+#endif
+	mov	r5, r0
+	add	r4, r2, #TI_CPU_SAVE
+	ldr	r0, =thread_notify_head
+	mov	r1, #THREAD_NOTIFY_SWITCH
+	bl	atomic_notifier_call_chain
+#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+	str	r7, [r8]
+#endif
+ THUMB(	mov	ip, r4			   )
+	mov	r0, r5
+ ARM(	ldmia	r4, {r4 - sl, fp, sp, pc}  )	@ Load all regs saved previously
+ THUMB(	ldmia	ip!, {r4 - sl, fp}	   )	@ Load all regs saved previously
+ THUMB(	ldr	sp, [ip], #4		   )
+ THUMB(	ldr	pc, [ip]		   )
+ UNWIND(.fnend		)
+ENDPROC(__switch_to)
+
+	__INIT
+
+/*
+ * User helpers.
+ *
+ * Each segment is 32-byte aligned and will be moved to the top of the high
+ * vector page.  New segments (if ever needed) must be added in front of
+ * existing ones.  This mechanism should be used only for things that are
+ * really small and justified, and not be abused freely.
+ *
+ * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
+ */
+ THUMB(	.arm	)
+
+	.macro	usr_ret, reg
+#ifdef CONFIG_ARM_THUMB
+	bx	\reg
+#else
+	ret	\reg
+#endif
+	.endm
+
+	.macro	kuser_pad, sym, size
+	.if	(. - \sym) & 3
+	.rept	4 - (. - \sym) & 3
+	.byte	0
+	.endr
+	.endif
+	.rept	(\size - (. - \sym)) / 4
+	.word	0xe7fddef1
+	.endr
+	.endm
+
+#ifdef CONFIG_KUSER_HELPERS
+	.align	5
+	.globl	__kuser_helper_start
+__kuser_helper_start:
+
+/*
+ * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular
+ * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point.
+ */
+
+__kuser_cmpxchg64:				@ 0xffff0f60
+
+#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+
+	/*
+	 * Poor you.  No fast solution possible...
+	 * The kernel itself must perform the operation.
+	 * A special ghost syscall is used for that (see traps.c).
+	 */
+	stmfd	sp!, {r7, lr}
+	ldr	r7, 1f			@ it's 20 bits
+	swi	__ARM_NR_cmpxchg64
+	ldmfd	sp!, {r7, pc}
+1:	.word	__ARM_NR_cmpxchg64
+
+#elif defined(CONFIG_CPU_32v6K)
+
+	stmfd	sp!, {r4, r5, r6, r7}
+	ldrd	r4, r5, [r0]			@ load old val
+	ldrd	r6, r7, [r1]			@ load new val
+	smp_dmb	arm
+1:	ldrexd	r0, r1, [r2]			@ load current val
+	eors	r3, r0, r4			@ compare with oldval (1)
+	eoreqs	r3, r1, r5			@ compare with oldval (2)
+	strexdeq r3, r6, r7, [r2]		@ store newval if eq
+	teqeq	r3, #1				@ success?
+	beq	1b				@ if no then retry
+	smp_dmb	arm
+	rsbs	r0, r3, #0			@ set returned val and C flag
+	ldmfd	sp!, {r4, r5, r6, r7}
+	usr_ret	lr
+
+#elif !defined(CONFIG_SMP)
+
+#ifdef CONFIG_MMU
+
+	/*
+	 * The only thing that can break atomicity in this cmpxchg64
+	 * implementation is either an IRQ or a data abort exception
+	 * causing another process/thread to be scheduled in the middle of
+	 * the critical sequence.  The same strategy as for cmpxchg is used.
+	 */
+	stmfd	sp!, {r4, r5, r6, lr}
+	ldmia	r0, {r4, r5}			@ load old val
+	ldmia	r1, {r6, lr}			@ load new val
+1:	ldmia	r2, {r0, r1}			@ load current val
+	eors	r3, r0, r4			@ compare with oldval (1)
+	eoreqs	r3, r1, r5			@ compare with oldval (2)
+2:	stmeqia	r2, {r6, lr}			@ store newval if eq
+	rsbs	r0, r3, #0			@ set return val and C flag
+	ldmfd	sp!, {r4, r5, r6, pc}
+
+	.text
+kuser_cmpxchg64_fixup:
+	@ Called from kuser_cmpxchg_fixup.
+	@ r4 = address of interrupted insn (must be preserved).
+	@ sp = saved regs. r7 and r8 are clobbered.
+	@ 1b = first critical insn, 2b = last critical insn.
+	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
+	mov	r7, #0xffff0fff
+	sub	r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
+	subs	r8, r4, r7
+	rsbcss	r8, r8, #(2b - 1b)
+	strcs	r7, [sp, #S_PC]
+#if __LINUX_ARM_ARCH__ < 6
+	bcc	kuser_cmpxchg32_fixup
+#endif
+	ret	lr
+	.previous
+
+#else
+#warning "NPTL on non MMU needs fixing"
+	mov	r0, #-1
+	adds	r0, r0, #0
+	usr_ret	lr
+#endif
+
+#else
+#error "incoherent kernel configuration"
+#endif
+
+	kuser_pad __kuser_cmpxchg64, 64
+
+__kuser_memory_barrier:				@ 0xffff0fa0
+	smp_dmb	arm
+	usr_ret	lr
+
+	kuser_pad __kuser_memory_barrier, 32
+
+__kuser_cmpxchg:				@ 0xffff0fc0
+
+#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+
+	/*
+	 * Poor you.  No fast solution possible...
+	 * The kernel itself must perform the operation.
+	 * A special ghost syscall is used for that (see traps.c).
+	 */
+	stmfd	sp!, {r7, lr}
+	ldr	r7, 1f			@ it's 20 bits
+	swi	__ARM_NR_cmpxchg
+	ldmfd	sp!, {r7, pc}
+1:	.word	__ARM_NR_cmpxchg
+
+#elif __LINUX_ARM_ARCH__ < 6
+
+#ifdef CONFIG_MMU
+
+	/*
+	 * The only thing that can break atomicity in this cmpxchg
+	 * implementation is either an IRQ or a data abort exception
+	 * causing another process/thread to be scheduled in the middle
+	 * of the critical sequence.  To prevent this, code is added to
+	 * the IRQ and data abort exception handlers to set the pc back
+	 * to the beginning of the critical section if it is found to be
+	 * within that critical section (see kuser_cmpxchg_fixup).
+	 */
+1:	ldr	r3, [r2]			@ load current val
+	subs	r3, r3, r0			@ compare with oldval
+2:	streq	r1, [r2]			@ store newval if eq
+	rsbs	r0, r3, #0			@ set return val and C flag
+	usr_ret	lr
+
+	.text
+kuser_cmpxchg32_fixup:
+	@ Called from kuser_cmpxchg_check macro.
+	@ r4 = address of interrupted insn (must be preserved).
+	@ sp = saved regs. r7 and r8 are clobbered.
+	@ 1b = first critical insn, 2b = last critical insn.
+	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
+	mov	r7, #0xffff0fff
+	sub	r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg)))
+	subs	r8, r4, r7
+	rsbcss	r8, r8, #(2b - 1b)
+	strcs	r7, [sp, #S_PC]
+	ret	lr
+	.previous
+
+#else
+#warning "NPTL on non MMU needs fixing"
+	mov	r0, #-1
+	adds	r0, r0, #0
+	usr_ret	lr
+#endif
+
+#else
+
+	smp_dmb	arm
+1:	ldrex	r3, [r2]
+	subs	r3, r3, r0
+	strexeq	r3, r1, [r2]
+	teqeq	r3, #1
+	beq	1b
+	rsbs	r0, r3, #0
+	/* beware -- each __kuser slot must be 8 instructions max */
+	ALT_SMP(b	__kuser_memory_barrier)
+	ALT_UP(usr_ret	lr)
+
+#endif
+
+	kuser_pad __kuser_cmpxchg, 32
+
+__kuser_get_tls:				@ 0xffff0fe0
+	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
+	usr_ret	lr
+	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
+	kuser_pad __kuser_get_tls, 16
+	.rep	3
+	.word	0			@ 0xffff0ff0 software TLS value, then
+	.endr				@ pad up to __kuser_helper_version
+
+__kuser_helper_version:				@ 0xffff0ffc
+	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
+
+	.globl	__kuser_helper_end
+__kuser_helper_end:
+
+#endif
+
+ THUMB(	.thumb	)
+
+/*
+ * Vector stubs.
+ *
+ * This code is copied to 0xffff1000 so we can use branches in the
+ * vectors, rather than ldr's.  Note that this code must not exceed
+ * a page size.
+ *
+ * Common stub entry macro:
+ *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
+ *
+ * SP points to a minimal amount of processor-private memory, the address
+ * of which is copied into r0 for the mode specific abort handler.
+ */
+	.macro	vector_stub, name, mode, correction=0
+	.align	5
+
+vector_\name:
+	.if \correction
+	sub	lr, lr, #\correction
+	.endif
+
+	@
+	@ Save r0, lr_<exception> (parent PC) and spsr_<exception>
+	@ (parent CPSR)
+	@
+	stmia	sp, {r0, lr}		@ save r0, lr
+	mrs	lr, spsr
+	str	lr, [sp, #8]		@ save spsr
+
+	@
+	@ Prepare for SVC32 mode.  IRQs remain disabled.
+	@
+	mrs	r0, cpsr
+	eor	r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE)
+	msr	spsr_cxsf, r0
+
+	@
+	@ the branch table must immediately follow this code
+	@
+	and	lr, lr, #0x0f
+ THUMB(	adr	r0, 1f			)
+ THUMB(	ldr	lr, [r0, lr, lsl #2]	)
+	mov	r0, sp
+ ARM(	ldr	lr, [pc, lr, lsl #2]	)
+	movs	pc, lr			@ branch to handler in SVC mode
+ENDPROC(vector_\name)
+
+	.align	2
+	@ handler addresses follow this label
+1:
+	.endm
+
+	.section .stubs, "ax", %progbits
+__stubs_start:
+	@ This must be the first word
+	.word	vector_swi
+
+vector_rst:
+ ARM(	swi	SYS_ERROR0	)
+ THUMB(	svc	#0		)
+ THUMB(	nop			)
+	b	vector_und
+
+/*
+ * Interrupt dispatcher
+ */
+	vector_stub	irq, IRQ_MODE, 4
+
+	.long	__irq_usr			@  0  (USR_26 / USR_32)
+	.long	__irq_invalid			@  1  (FIQ_26 / FIQ_32)
+	.long	__irq_invalid			@  2  (IRQ_26 / IRQ_32)
+	.long	__irq_svc			@  3  (SVC_26 / SVC_32)
+	.long	__irq_invalid			@  4
+	.long	__irq_invalid			@  5
+	.long	__irq_invalid			@  6
+	.long	__irq_invalid			@  7
+	.long	__irq_invalid			@  8
+	.long	__irq_invalid			@  9
+	.long	__irq_invalid			@  a
+	.long	__irq_invalid			@  b
+	.long	__irq_invalid			@  c
+	.long	__irq_invalid			@  d
+	.long	__irq_invalid			@  e
+	.long	__irq_invalid			@  f
+
+/*
+ * Data abort dispatcher
+ * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
+ */
+	vector_stub	dabt, ABT_MODE, 8
+
+	.long	__dabt_usr			@  0  (USR_26 / USR_32)
+	.long	__dabt_invalid			@  1  (FIQ_26 / FIQ_32)
+	.long	__dabt_invalid			@  2  (IRQ_26 / IRQ_32)
+	.long	__dabt_svc			@  3  (SVC_26 / SVC_32)
+	.long	__dabt_invalid			@  4
+	.long	__dabt_invalid			@  5
+	.long	__dabt_invalid			@  6
+	.long	__dabt_invalid			@  7
+	.long	__dabt_invalid			@  8
+	.long	__dabt_invalid			@  9
+	.long	__dabt_invalid			@  a
+	.long	__dabt_invalid			@  b
+	.long	__dabt_invalid			@  c
+	.long	__dabt_invalid			@  d
+	.long	__dabt_invalid			@  e
+	.long	__dabt_invalid			@  f
+
+/*
+ * Prefetch abort dispatcher
+ * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
+ */
+	vector_stub	pabt, ABT_MODE, 4
+
+	.long	__pabt_usr			@  0 (USR_26 / USR_32)
+	.long	__pabt_invalid			@  1 (FIQ_26 / FIQ_32)
+	.long	__pabt_invalid			@  2 (IRQ_26 / IRQ_32)
+	.long	__pabt_svc			@  3 (SVC_26 / SVC_32)
+	.long	__pabt_invalid			@  4
+	.long	__pabt_invalid			@  5
+	.long	__pabt_invalid			@  6
+	.long	__pabt_invalid			@  7
+	.long	__pabt_invalid			@  8
+	.long	__pabt_invalid			@  9
+	.long	__pabt_invalid			@  a
+	.long	__pabt_invalid			@  b
+	.long	__pabt_invalid			@  c
+	.long	__pabt_invalid			@  d
+	.long	__pabt_invalid			@  e
+	.long	__pabt_invalid			@  f
+
+/*
+ * Undef instr entry dispatcher
+ * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
+ */
+	vector_stub	und, UND_MODE
+
+	.long	__und_usr			@  0 (USR_26 / USR_32)
+	.long	__und_invalid			@  1 (FIQ_26 / FIQ_32)
+	.long	__und_invalid			@  2 (IRQ_26 / IRQ_32)
+	.long	__und_svc			@  3 (SVC_26 / SVC_32)
+	.long	__und_invalid			@  4
+	.long	__und_invalid			@  5
+	.long	__und_invalid			@  6
+	.long	__und_invalid			@  7
+	.long	__und_invalid			@  8
+	.long	__und_invalid			@  9
+	.long	__und_invalid			@  a
+	.long	__und_invalid			@  b
+	.long	__und_invalid			@  c
+	.long	__und_invalid			@  d
+	.long	__und_invalid			@  e
+	.long	__und_invalid			@  f
+
+	.align	5
+
+/*=============================================================================
+ * Address exception handler
+ *-----------------------------------------------------------------------------
+ * These aren't too critical.
+ * (they're not supposed to happen, and won't happen in 32-bit data mode).
+ */
+
+vector_addrexcptn:
+	b	vector_addrexcptn
+
+/*=============================================================================
+ * FIQ "NMI" handler
+ *-----------------------------------------------------------------------------
+ * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
+ * systems.
+ */
+	vector_stub	fiq, FIQ_MODE, 4
+
+	.long	__fiq_usr			@  0  (USR_26 / USR_32)
+	.long	__fiq_svc			@  1  (FIQ_26 / FIQ_32)
+	.long	__fiq_svc			@  2  (IRQ_26 / IRQ_32)
+	.long	__fiq_svc			@  3  (SVC_26 / SVC_32)
+	.long	__fiq_svc			@  4
+	.long	__fiq_svc			@  5
+	.long	__fiq_svc			@  6
+	.long	__fiq_abt			@  7
+	.long	__fiq_svc			@  8
+	.long	__fiq_svc			@  9
+	.long	__fiq_svc			@  a
+	.long	__fiq_svc			@  b
+	.long	__fiq_svc			@  c
+	.long	__fiq_svc			@  d
+	.long	__fiq_svc			@  e
+	.long	__fiq_svc			@  f
+
+	.globl	vector_fiq_offset
+	.equ	vector_fiq_offset, vector_fiq
+
+	.section .vectors, "ax", %progbits
+__vectors_start:
+	W(b)	vector_rst
+	W(b)	vector_und
+	W(ldr)	pc, __vectors_start + 0x1000
+	W(b)	vector_pabt
+	W(b)	vector_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_irq
+	W(b)	vector_fiq
+
+	.data
+
+	.globl	cr_alignment
+cr_alignment:
+	.space	4
+
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+	.globl	handle_arch_irq
+handle_arch_irq:
+	.space	4
+#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
new file mode 100644
index 000000000..4e7f40c57
--- /dev/null
+++ b/arch/arm/kernel/entry-common.S
@@ -0,0 +1,388 @@
+/*
+ *  linux/arch/arm/kernel/entry-common.S
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/unistd.h>
+#include <asm/ftrace.h>
+#include <asm/unwind.h>
+
+#ifdef CONFIG_NEED_RET_TO_USER
+#include <mach/entry-macro.S>
+#else
+	.macro  arch_ret_to_user, tmp1, tmp2
+	.endm
+#endif
+
+#include "entry-header.S"
+
+
+	.align	5
+/*
+ * This is the fast syscall return path.  We do as little as
+ * possible here, and this includes saving r0 back into the SVC
+ * stack.
+ */
+ret_fast_syscall:
+ UNWIND(.fnstart	)
+ UNWIND(.cantunwind	)
+	disable_irq				@ disable interrupts
+	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	__sys_trace_return
+	tst	r1, #_TIF_WORK_MASK
+	bne	fast_work_pending
+	asm_trace_hardirqs_on
+
+	/* perform architecture specific actions before user return */
+	arch_ret_to_user r1, lr
+	ct_user_enter
+
+	restore_user_regs fast = 1, offset = S_OFF
+ UNWIND(.fnend		)
+
+/*
+ * Ok, we need to do extra processing, enter the slow path.
+ */
+fast_work_pending:
+	str	r0, [sp, #S_R0+S_OFF]!		@ returned r0
+work_pending:
+	mov	r0, sp				@ 'regs'
+	mov	r2, why				@ 'syscall'
+	bl	do_work_pending
+	cmp	r0, #0
+	beq	no_work_pending
+	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
+	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
+	b	local_restart			@ ... and off we go
+
+/*
+ * "slow" syscall return path.  "why" tells us if this was a real syscall.
+ */
+ENTRY(ret_to_user)
+ret_slow_syscall:
+	disable_irq				@ disable interrupts
+ENTRY(ret_to_user_from_irq)
+	ldr	r1, [tsk, #TI_FLAGS]
+	tst	r1, #_TIF_WORK_MASK
+	bne	work_pending
+no_work_pending:
+	asm_trace_hardirqs_on
+
+	/* perform architecture specific actions before user return */
+	arch_ret_to_user r1, lr
+	ct_user_enter save = 0
+
+	restore_user_regs fast = 0, offset = 0
+ENDPROC(ret_to_user_from_irq)
+ENDPROC(ret_to_user)
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+	bl	schedule_tail
+	cmp	r5, #0
+	movne	r0, r4
+	adrne	lr, BSYM(1f)
+	retne	r5
+1:	get_thread_info tsk
+	b	ret_slow_syscall
+ENDPROC(ret_from_fork)
+
+	.equ NR_syscalls,0
+#define CALL(x) .equ NR_syscalls,NR_syscalls+1
+#include "calls.S"
+
+/*
+ * Ensure that the system call table is equal to __NR_syscalls,
+ * which is the value the rest of the system sees
+ */
+.ifne NR_syscalls - __NR_syscalls
+.error "__NR_syscalls is not equal to the size of the syscall table"
+.endif
+
+#undef CALL
+#define CALL(x) .long x
+
+/*=============================================================================
+ * SWI handler
+ *-----------------------------------------------------------------------------
+ */
+
+	.align	5
+ENTRY(vector_swi)
+#ifdef CONFIG_CPU_V7M
+	v7m_exception_entry
+#else
+	sub	sp, sp, #S_FRAME_SIZE
+	stmia	sp, {r0 - r12}			@ Calling r0 - r12
+ ARM(	add	r8, sp, #S_PC		)
+ ARM(	stmdb	r8, {sp, lr}^		)	@ Calling sp, lr
+ THUMB(	mov	r8, sp			)
+ THUMB(	store_user_sp_lr r8, r10, S_SP	)	@ calling sp, lr
+	mrs	r8, spsr			@ called from non-FIQ mode, so ok.
+	str	lr, [sp, #S_PC]			@ Save calling PC
+	str	r8, [sp, #S_PSR]		@ Save CPSR
+	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
+#endif
+	zero_fp
+	alignment_trap r10, ip, __cr_alignment
+	enable_irq
+	ct_user_exit
+	get_thread_info tsk
+
+	/*
+	 * Get the system call number.
+	 */
+
+#if defined(CONFIG_OABI_COMPAT)
+
+	/*
+	 * If we have CONFIG_OABI_COMPAT then we need to look at the swi
+	 * value to determine if it is an EABI or an old ABI call.
+	 */
+#ifdef CONFIG_ARM_THUMB
+	tst	r8, #PSR_T_BIT
+	movne	r10, #0				@ no thumb OABI emulation
+ USER(	ldreq	r10, [lr, #-4]		)	@ get SWI instruction
+#else
+ USER(	ldr	r10, [lr, #-4]		)	@ get SWI instruction
+#endif
+ ARM_BE8(rev	r10, r10)			@ little endian instruction
+
+#elif defined(CONFIG_AEABI)
+
+	/*
+	 * Pure EABI user space always put syscall number into scno (r7).
+	 */
+#elif defined(CONFIG_ARM_THUMB)
+	/* Legacy ABI only, possibly thumb mode. */
+	tst	r8, #PSR_T_BIT			@ this is SPSR from save_user_regs
+	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
+ USER(	ldreq	scno, [lr, #-4]		)
+
+#else
+	/* Legacy ABI only. */
+ USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
+#endif
+
+	adr	tbl, sys_call_table		@ load syscall table pointer
+
+#if defined(CONFIG_OABI_COMPAT)
+	/*
+	 * If the swi argument is zero, this is an EABI call and we do nothing.
+	 *
+	 * If this is an old ABI call, get the syscall number into scno and
+	 * get the old ABI syscall table address.
+	 */
+	bics	r10, r10, #0xff000000
+	eorne	scno, r10, #__NR_OABI_SYSCALL_BASE
+	ldrne	tbl, =sys_oabi_call_table
+#elif !defined(CONFIG_AEABI)
+	bic	scno, scno, #0xff000000		@ mask off SWI op-code
+	eor	scno, scno, #__NR_SYSCALL_BASE	@ check OS number
+#endif
+
+local_restart:
+	ldr	r10, [tsk, #TI_FLAGS]		@ check for syscall tracing
+	stmdb	sp!, {r4, r5}			@ push fifth and sixth args
+
+	tst	r10, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
+	bne	__sys_trace
+
+	cmp	scno, #NR_syscalls		@ check upper syscall limit
+	adr	lr, BSYM(ret_fast_syscall)	@ return address
+	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+
+	add	r1, sp, #S_OFF
+2:	cmp	scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
+	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
+	bcs	arm_syscall
+	mov	why, #0				@ no longer a real syscall
+	b	sys_ni_syscall			@ not private func
+
+#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
+	/*
+	 * We failed to handle a fault trying to access the page
+	 * containing the swi instruction, but we're not really in a
+	 * position to return -EFAULT. Instead, return back to the
+	 * instruction and re-enter the user fault handling path trying
+	 * to page it in. This will likely result in sending SEGV to the
+	 * current task.
+	 */
+9001:
+	sub	lr, lr, #4
+	str	lr, [sp, #S_PC]
+	b	ret_fast_syscall
+#endif
+ENDPROC(vector_swi)
+
+	/*
+	 * This is the really slow path.  We're going to be doing
+	 * context switches, and waiting for our parent to respond.
+	 */
+__sys_trace:
+	mov	r1, scno
+	add	r0, sp, #S_OFF
+	bl	syscall_trace_enter
+
+	adr	lr, BSYM(__sys_trace_return)	@ return address
+	mov	scno, r0			@ syscall number (possibly new)
+	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
+	cmp	scno, #NR_syscalls		@ check upper syscall limit
+	ldmccia	r1, {r0 - r6}			@ have to reload r0 - r6
+	stmccia	sp, {r4, r5}			@ and update the stack args
+	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+	cmp	scno, #-1			@ skip the syscall?
+	bne	2b
+	add	sp, sp, #S_OFF			@ restore stack
+	b	ret_slow_syscall
+
+__sys_trace_return:
+	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
+	mov	r0, sp
+	bl	syscall_trace_exit
+	b	ret_slow_syscall
+
+	.align	5
+#ifdef CONFIG_ALIGNMENT_TRAP
+	.type	__cr_alignment, #object
+__cr_alignment:
+	.word	cr_alignment
+#endif
+	.ltorg
+
+/*
+ * This is the syscall table declaration for native ABI syscalls.
+ * With EABI a couple syscalls are obsolete and defined as sys_ni_syscall.
+ */
+#define ABI(native, compat) native
+#ifdef CONFIG_AEABI
+#define OBSOLETE(syscall) sys_ni_syscall
+#else
+#define OBSOLETE(syscall) syscall
+#endif
+
+	.type	sys_call_table, #object
+ENTRY(sys_call_table)
+#include "calls.S"
+#undef ABI
+#undef OBSOLETE
+
+/*============================================================================
+ * Special system call wrappers
+ */
+@ r0 = syscall number
+@ r8 = syscall table
+sys_syscall:
+		bic	scno, r0, #__NR_OABI_SYSCALL_BASE
+		cmp	scno, #__NR_syscall - __NR_SYSCALL_BASE
+		cmpne	scno, #NR_syscalls	@ check range
+		stmloia	sp, {r5, r6}		@ shuffle args
+		movlo	r0, r1
+		movlo	r1, r2
+		movlo	r2, r3
+		movlo	r3, r4
+		ldrlo	pc, [tbl, scno, lsl #2]
+		b	sys_ni_syscall
+ENDPROC(sys_syscall)
+
+sys_sigreturn_wrapper:
+		add	r0, sp, #S_OFF
+		mov	why, #0		@ prevent syscall restart handling
+		b	sys_sigreturn
+ENDPROC(sys_sigreturn_wrapper)
+
+sys_rt_sigreturn_wrapper:
+		add	r0, sp, #S_OFF
+		mov	why, #0		@ prevent syscall restart handling
+		b	sys_rt_sigreturn
+ENDPROC(sys_rt_sigreturn_wrapper)
+
+sys_statfs64_wrapper:
+		teq	r1, #88
+		moveq	r1, #84
+		b	sys_statfs64
+ENDPROC(sys_statfs64_wrapper)
+
+sys_fstatfs64_wrapper:
+		teq	r1, #88
+		moveq	r1, #84
+		b	sys_fstatfs64
+ENDPROC(sys_fstatfs64_wrapper)
+
+/*
+ * Note: off_4k (r5) is always units of 4K.  If we can't do the requested
+ * offset, we return EINVAL.
+ */
+sys_mmap2:
+#if PAGE_SHIFT > 12
+		tst	r5, #PGOFF_MASK
+		moveq	r5, r5, lsr #PAGE_SHIFT - 12
+		streq	r5, [sp, #4]
+		beq	sys_mmap_pgoff
+		mov	r0, #-EINVAL
+		ret	lr
+#else
+		str	r5, [sp, #4]
+		b	sys_mmap_pgoff
+#endif
+ENDPROC(sys_mmap2)
+
+#ifdef CONFIG_OABI_COMPAT
+
+/*
+ * These are syscalls with argument register differences
+ */
+
+sys_oabi_pread64:
+		stmia	sp, {r3, r4}
+		b	sys_pread64
+ENDPROC(sys_oabi_pread64)
+
+sys_oabi_pwrite64:
+		stmia	sp, {r3, r4}
+		b	sys_pwrite64
+ENDPROC(sys_oabi_pwrite64)
+
+sys_oabi_truncate64:
+		mov	r3, r2
+		mov	r2, r1
+		b	sys_truncate64
+ENDPROC(sys_oabi_truncate64)
+
+sys_oabi_ftruncate64:
+		mov	r3, r2
+		mov	r2, r1
+		b	sys_ftruncate64
+ENDPROC(sys_oabi_ftruncate64)
+
+sys_oabi_readahead:
+		str	r3, [sp]
+		mov	r3, r2
+		mov	r2, r1
+		b	sys_readahead
+ENDPROC(sys_oabi_readahead)
+
+/*
+ * Let's declare a second syscall table for old ABI binaries
+ * using the compatibility syscall entries.
+ */
+#define ABI(native, compat) compat
+#define OBSOLETE(syscall) syscall
+
+	.type	sys_oabi_call_table, #object
+ENTRY(sys_oabi_call_table)
+#include "calls.S"
+#undef ABI
+#undef OBSOLETE
+
+#endif
+
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
new file mode 100644
index 000000000..fe57c73e7
--- /dev/null
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -0,0 +1,243 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/ftrace.h>
+#include <asm/unwind.h>
+
+#include "entry-header.S"
+
+/*
+ * When compiling with -pg, gcc inserts a call to the mcount routine at the
+ * start of every function.  In mcount, apart from the function's address (in
+ * lr), we need to get hold of the function's caller's address.
+ *
+ * Older GCCs (pre-4.4) inserted a call to a routine called mcount like this:
+ *
+ *	bl	mcount
+ *
+ * These versions have the limitation that in order for the mcount routine to
+ * be able to determine the function's caller's address, an APCS-style frame
+ * pointer (which is set up with something like the code below) is required.
+ *
+ *	mov     ip, sp
+ *	push    {fp, ip, lr, pc}
+ *	sub     fp, ip, #4
+ *
+ * With EABI, these frame pointers are not available unless -mapcs-frame is
+ * specified, and if building as Thumb-2, not even then.
+ *
+ * Newer GCCs (4.4+) solve this problem by introducing a new version of mcount,
+ * with call sites like:
+ *
+ *	push	{lr}
+ *	bl	__gnu_mcount_nc
+ *
+ * With these compilers, frame pointers are not necessary.
+ *
+ * mcount can be thought of as a function called in the middle of a subroutine
+ * call.  As such, it needs to be transparent for both the caller and the
+ * callee: the original lr needs to be restored when leaving mcount, and no
+ * registers should be clobbered.  (In the __gnu_mcount_nc implementation, we
+ * clobber the ip register.  This is OK because the ARM calling convention
+ * allows it to be clobbered in subroutines and doesn't use it to hold
+ * parameters.)
+ *
+ * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0"
+ * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see
+ * arch/arm/kernel/ftrace.c).
+ */
+
+#ifndef CONFIG_OLD_MCOUNT
+#if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
+#error Ftrace requires CONFIG_FRAME_POINTER=y with GCC older than 4.4.0.
+#endif
+#endif
+
+.macro mcount_adjust_addr rd, rn
+	bic	\rd, \rn, #1		@ clear the Thumb bit if present
+	sub	\rd, \rd, #MCOUNT_INSN_SIZE
+.endm
+
+.macro __mcount suffix
+	mcount_enter
+	ldr	r0, =ftrace_trace_function
+	ldr	r2, [r0]
+	adr	r0, .Lftrace_stub
+	cmp	r0, r2
+	bne	1f
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldr     r1, =ftrace_graph_return
+	ldr     r2, [r1]
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+
+	ldr     r1, =ftrace_graph_entry
+	ldr     r2, [r1]
+	ldr     r0, =ftrace_graph_entry_stub
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+#endif
+
+	mcount_exit
+
+1: 	mcount_get_lr	r1			@ lr of instrumented func
+	mcount_adjust_addr	r0, lr		@ instrumented function
+	adr	lr, BSYM(2f)
+	mov	pc, r2
+2:	mcount_exit
+.endm
+
+.macro __ftrace_caller suffix
+	mcount_enter
+
+	mcount_get_lr	r1			@ lr of instrumented func
+	mcount_adjust_addr	r0, lr		@ instrumented function
+
+	.globl ftrace_call\suffix
+ftrace_call\suffix:
+	bl	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl ftrace_graph_call\suffix
+ftrace_graph_call\suffix:
+	mov	r0, r0
+#endif
+
+	mcount_exit
+.endm
+
+.macro __ftrace_graph_caller
+	sub	r0, fp, #4		@ &lr of instrumented routine (&parent)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	@ called from __ftrace_caller, saved in mcount_enter
+	ldr	r1, [sp, #16]		@ instrumented routine (func)
+	mcount_adjust_addr	r1, r1
+#else
+	@ called from __mcount, untouched in lr
+	mcount_adjust_addr	r1, lr	@ instrumented routine (func)
+#endif
+	mov	r2, fp			@ frame pointer
+	bl	prepare_ftrace_return
+	mcount_exit
+.endm
+
+#ifdef CONFIG_OLD_MCOUNT
+/*
+ * mcount
+ */
+
+.macro mcount_enter
+	stmdb	sp!, {r0-r3, lr}
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [fp, #-4]
+.endm
+
+.macro mcount_exit
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {r0-r3, pc}
+.endm
+
+ENTRY(mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	stmdb	sp!, {lr}
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {pc}
+#else
+	__mcount _old
+#endif
+ENDPROC(mcount)
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller_old)
+	__ftrace_caller _old
+ENDPROC(ftrace_caller_old)
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller_old)
+	__ftrace_graph_caller
+ENDPROC(ftrace_graph_caller_old)
+#endif
+
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+#endif
+
+/*
+ * __gnu_mcount_nc
+ */
+
+.macro mcount_enter
+/*
+ * This pad compensates for the push {lr} at the call site.  Note that we are
+ * unable to unwind through a function which does not otherwise save its lr.
+ */
+ UNWIND(.pad	#4)
+	stmdb	sp!, {r0-r3, lr}
+ UNWIND(.save	{r0-r3, lr})
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [sp, #20]
+.endm
+
+.macro mcount_exit
+	ldmia	sp!, {r0-r3, ip, lr}
+	ret	ip
+.endm
+
+ENTRY(__gnu_mcount_nc)
+UNWIND(.fnstart)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	mov	ip, lr
+	ldmia	sp!, {lr}
+	ret	ip
+#else
+	__mcount
+#endif
+UNWIND(.fnend)
+ENDPROC(__gnu_mcount_nc)
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller)
+UNWIND(.fnstart)
+	__ftrace_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_caller)
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+UNWIND(.fnstart)
+	__ftrace_graph_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_graph_caller)
+#endif
+
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl return_to_handler
+return_to_handler:
+	stmdb	sp!, {r0-r3}
+	mov	r0, fp			@ frame pointer
+	bl	ftrace_return_to_handler
+	mov	lr, r0			@ r0 has real ret addr
+	ldmia	sp!, {r0-r3}
+	ret	lr
+#endif
+
+ENTRY(ftrace_stub)
+.Lftrace_stub:
+	ret	lr
+ENDPROC(ftrace_stub)
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
new file mode 100644
index 000000000..1a0045abe
--- /dev/null
+++ b/arch/arm/kernel/entry-header.S
@@ -0,0 +1,401 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/thread_info.h>
+#include <asm/v7m.h>
+
+@ Bad Abort numbers
+@ -----------------
+@
+#define BAD_PREFETCH	0
+#define BAD_DATA	1
+#define BAD_ADDREXCPTN	2
+#define BAD_IRQ		3
+#define BAD_UNDEFINSTR	4
+
+@
+@ Most of the stack format comes from struct pt_regs, but with
+@ the addition of 8 bytes for storing syscall args 5 and 6.
+@ This _must_ remain a multiple of 8 for EABI.
+@
+#define S_OFF		8
+
+/* 
+ * The SWI code relies on the fact that R0 is at the bottom of the stack
+ * (due to slow/fast restore user regs).
+ */
+#if S_R0 != 0
+#error "Please fix"
+#endif
+
+	.macro	zero_fp
+#ifdef CONFIG_FRAME_POINTER
+	mov	fp, #0
+#endif
+	.endm
+
+#ifdef CONFIG_ALIGNMENT_TRAP
+#define ATRAP(x...) x
+#else
+#define ATRAP(x...)
+#endif
+
+	.macro	alignment_trap, rtmp1, rtmp2, label
+#ifdef CONFIG_ALIGNMENT_TRAP
+	mrc	p15, 0, \rtmp2, c1, c0, 0
+	ldr	\rtmp1, \label
+	ldr	\rtmp1, [\rtmp1]
+	teq	\rtmp1, \rtmp2
+	mcrne	p15, 0, \rtmp1, c1, c0, 0
+#endif
+	.endm
+
+#ifdef CONFIG_CPU_V7M
+/*
+ * ARMv7-M exception entry/exit macros.
+ *
+ * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are
+ * automatically saved on the current stack (32 words) before
+ * switching to the exception stack (SP_main).
+ *
+ * If exception is taken while in user mode, SP_main is
+ * empty. Otherwise, SP_main is aligned to 64 bit automatically
+ * (CCR.STKALIGN set).
+ *
+ * Linux assumes that the interrupts are disabled when entering an
+ * exception handler and it may BUG if this is not the case. Interrupts
+ * are disabled during entry and reenabled in the exit macro.
+ *
+ * v7m_exception_slow_exit is used when returning from SVC or PendSV.
+ * When returning to kernel mode, we don't return from exception.
+ */
+	.macro	v7m_exception_entry
+	@ determine the location of the registers saved by the core during
+	@ exception entry. Depending on the mode the cpu was in when the
+	@ exception happend that is either on the main or the process stack.
+	@ Bit 2 of EXC_RETURN stored in the lr register specifies which stack
+	@ was used.
+	tst	lr, #EXC_RET_STACK_MASK
+	mrsne	r12, psp
+	moveq	r12, sp
+
+	@ we cannot rely on r0-r3 and r12 matching the value saved in the
+	@ exception frame because of tail-chaining. So these have to be
+	@ reloaded.
+	ldmia	r12!, {r0-r3}
+
+	@ Linux expects to have irqs off. Do it here before taking stack space
+	cpsid	i
+
+	sub	sp, #S_FRAME_SIZE-S_IP
+	stmdb	sp!, {r0-r11}
+
+	@ load saved r12, lr, return address and xPSR.
+	@ r0-r7 are used for signals and never touched from now on. Clobbering
+	@ r8-r12 is OK.
+	mov	r9, r12
+	ldmia	r9!, {r8, r10-r12}
+
+	@ calculate the original stack pointer value.
+	@ r9 currently points to the memory location just above the auto saved
+	@ xPSR.
+	@ The cpu might automatically 8-byte align the stack. Bit 9
+	@ of the saved xPSR specifies if stack aligning took place. In this case
+	@ another 32-bit value is included in the stack.
+
+	tst	r12, V7M_xPSR_FRAMEPTRALIGN
+	addne	r9, r9, #4
+
+	@ store saved r12 using str to have a register to hold the base for stm
+	str	r8, [sp, #S_IP]
+	add	r8, sp, #S_SP
+	@ store r13-r15, xPSR
+	stmia	r8!, {r9-r12}
+	@ store old_r0
+	str	r0, [r8]
+	.endm
+
+        /*
+	 * PENDSV and SVCALL are configured to have the same exception
+	 * priorities. As a kernel thread runs at SVCALL execution priority it
+	 * can never be preempted and so we will never have to return to a
+	 * kernel thread here.
+         */
+	.macro	v7m_exception_slow_exit ret_r0
+	cpsid	i
+	ldr	lr, =EXC_RET_THREADMODE_PROCESSSTACK
+
+	@ read original r12, sp, lr, pc and xPSR
+	add	r12, sp, #S_IP
+	ldmia	r12, {r1-r5}
+
+	@ an exception frame is always 8-byte aligned. To tell the hardware if
+	@ the sp to be restored is aligned or not set bit 9 of the saved xPSR
+	@ accordingly.
+	tst	r2, #4
+	subne	r2, r2, #4
+	orrne	r5, V7M_xPSR_FRAMEPTRALIGN
+	biceq	r5, V7M_xPSR_FRAMEPTRALIGN
+
+	@ ensure bit 0 is cleared in the PC, otherwise behaviour is
+	@ unpredictable
+	bic	r4, #1
+
+	@ write basic exception frame
+	stmdb	r2!, {r1, r3-r5}
+	ldmia	sp, {r1, r3-r5}
+	.if	\ret_r0
+	stmdb	r2!, {r0, r3-r5}
+	.else
+	stmdb	r2!, {r1, r3-r5}
+	.endif
+
+	@ restore process sp
+	msr	psp, r2
+
+	@ restore original r4-r11
+	ldmia	sp!, {r0-r11}
+
+	@ restore main sp
+	add	sp, sp, #S_FRAME_SIZE-S_IP
+
+	cpsie	i
+	bx	lr
+	.endm
+#endif	/* CONFIG_CPU_V7M */
+
+	@
+	@ Store/load the USER SP and LR registers by switching to the SYS
+	@ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not
+	@ available. Should only be called from SVC mode
+	@
+	.macro	store_user_sp_lr, rd, rtemp, offset = 0
+	mrs	\rtemp, cpsr
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch to the SYS mode
+
+	str	sp, [\rd, #\offset]		@ save sp_usr
+	str	lr, [\rd, #\offset + 4]		@ save lr_usr
+
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
+	.endm
+
+	.macro	load_user_sp_lr, rd, rtemp, offset = 0
+	mrs	\rtemp, cpsr
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch to the SYS mode
+
+	ldr	sp, [\rd, #\offset]		@ load sp_usr
+	ldr	lr, [\rd, #\offset + 4]		@ load lr_usr
+
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
+	.endm
+
+#ifndef CONFIG_THUMB2_KERNEL
+	.macro	svc_exit, rpsr, irq = 0
+	.if	\irq != 0
+	@ IRQs already off
+#ifdef CONFIG_TRACE_IRQFLAGS
+	@ The parent context IRQs must have been enabled to get here in
+	@ the first place, so there's no point checking the PSR I bit.
+	bl	trace_hardirqs_on
+#endif
+	.else
+	@ IRQs off again before pulling preserved data off the stack
+	disable_irq_notrace
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	\rpsr, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	\rpsr, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	.endif
+	msr	spsr_cxsf, \rpsr
+#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
+	@ We must avoid clrex due to Cortex-A15 erratum #830321
+	sub	r0, sp, #4			@ uninhabited address
+	strex	r1, r2, [r0]			@ clear the exclusive monitor
+#endif
+	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+	.endm
+
+	@
+	@ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
+	@
+	@ This macro acts in a similar manner to svc_exit but switches to FIQ
+	@ mode to restore the final part of the register state.
+	@
+	@ We cannot use the normal svc_exit procedure because that would
+	@ clobber spsr_svc (FIQ could be delivered during the first few
+	@ instructions of vector_swi meaning its contents have not been
+	@ saved anywhere).
+	@
+	@ Note that, unlike svc_exit, this macro also does not allow a caller
+	@ supplied rpsr. This is because the FIQ exceptions are not re-entrant
+	@ and the handlers cannot call into the scheduler (meaning the value
+	@ on the stack remains correct).
+	@
+	.macro  svc_exit_via_fiq
+	mov	r0, sp
+	ldmib	r0, {r1 - r14}	@ abort is deadly from here onward (it will
+				@ clobber state restored below)
+	msr	cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+	add	r8, r0, #S_PC
+	ldr	r9, [r0, #S_PSR]
+	msr	spsr_cxsf, r9
+	ldr	r0, [r0, #S_R0]
+	ldmia	r8, {pc}^
+	.endm
+
+	.macro	restore_user_regs, fast = 0, offset = 0
+	mov	r2, sp
+	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
+	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
+	msr	spsr_cxsf, r1			@ save in spsr_svc
+#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
+	@ We must avoid clrex due to Cortex-A15 erratum #830321
+	strex	r1, r2, [r2]			@ clear the exclusive monitor
+#endif
+	.if	\fast
+	ldmdb	r2, {r1 - lr}^			@ get calling r1 - lr
+	.else
+	ldmdb	r2, {r0 - lr}^			@ get calling r0 - lr
+	.endif
+	mov	r0, r0				@ ARMv5T and earlier require a nop
+						@ after ldm {}^
+	add	sp, sp, #\offset + S_FRAME_SIZE
+	movs	pc, lr				@ return & move spsr_svc into cpsr
+	.endm
+
+#else	/* CONFIG_THUMB2_KERNEL */
+	.macro	svc_exit, rpsr, irq = 0
+	.if	\irq != 0
+	@ IRQs already off
+#ifdef CONFIG_TRACE_IRQFLAGS
+	@ The parent context IRQs must have been enabled to get here in
+	@ the first place, so there's no point checking the PSR I bit.
+	bl	trace_hardirqs_on
+#endif
+	.else
+	@ IRQs off again before pulling preserved data off the stack
+	disable_irq_notrace
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	\rpsr, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	\rpsr, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	.endif
+	ldr	lr, [sp, #S_SP]			@ top of the stack
+	ldrd	r0, r1, [sp, #S_LR]		@ calling lr and pc
+
+	@ We must avoid clrex due to Cortex-A15 erratum #830321
+	strex	r2, r1, [sp, #S_LR]		@ clear the exclusive monitor
+
+	stmdb	lr!, {r0, r1, \rpsr}		@ calling lr and rfe context
+	ldmia	sp, {r0 - r12}
+	mov	sp, lr
+	ldr	lr, [sp], #4
+	rfeia	sp!
+	.endm
+
+	@
+	@ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
+	@
+	@ For full details see non-Thumb implementation above.
+	@
+	.macro  svc_exit_via_fiq
+	add	r0, sp, #S_R2
+	ldr	lr, [sp, #S_LR]
+	ldr	sp, [sp, #S_SP] @ abort is deadly from here onward (it will
+			        @ clobber state restored below)
+	ldmia	r0, {r2 - r12}
+	mov	r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+	msr	cpsr_c, r1
+	sub	r0, #S_R2
+	add	r8, r0, #S_PC
+	ldmia	r0, {r0 - r1}
+	rfeia	r8
+	.endm
+
+#ifdef CONFIG_CPU_V7M
+	/*
+	 * Note we don't need to do clrex here as clearing the local monitor is
+	 * part of each exception entry and exit sequence.
+	 */
+	.macro	restore_user_regs, fast = 0, offset = 0
+	.if	\offset
+	add	sp, #\offset
+	.endif
+	v7m_exception_slow_exit ret_r0 = \fast
+	.endm
+#else	/* ifdef CONFIG_CPU_V7M */
+	.macro	restore_user_regs, fast = 0, offset = 0
+	mov	r2, sp
+	load_user_sp_lr r2, r3, \offset + S_SP	@ calling sp, lr
+	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
+	ldr	lr, [sp, #\offset + S_PC]	@ get pc
+	add	sp, sp, #\offset + S_SP
+	msr	spsr_cxsf, r1			@ save in spsr_svc
+
+	@ We must avoid clrex due to Cortex-A15 erratum #830321
+	strex	r1, r2, [sp]			@ clear the exclusive monitor
+
+	.if	\fast
+	ldmdb	sp, {r1 - r12}			@ get calling r1 - r12
+	.else
+	ldmdb	sp, {r0 - r12}			@ get calling r0 - r12
+	.endif
+	add	sp, sp, #S_FRAME_SIZE - S_SP
+	movs	pc, lr				@ return & move spsr_svc into cpsr
+	.endm
+#endif	/* ifdef CONFIG_CPU_V7M / else */
+#endif	/* !CONFIG_THUMB2_KERNEL */
+
+/*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+	.macro ct_user_exit, save = 1
+#ifdef CONFIG_CONTEXT_TRACKING
+	.if	\save
+	stmdb   sp!, {r0-r3, ip, lr}
+	bl	context_tracking_user_exit
+	ldmia	sp!, {r0-r3, ip, lr}
+	.else
+	bl	context_tracking_user_exit
+	.endif
+#endif
+	.endm
+
+	.macro ct_user_enter, save = 1
+#ifdef CONFIG_CONTEXT_TRACKING
+	.if	\save
+	stmdb   sp!, {r0-r3, ip, lr}
+	bl	context_tracking_user_enter
+	ldmia	sp!, {r0-r3, ip, lr}
+	.else
+	bl	context_tracking_user_enter
+	.endif
+#endif
+	.endm
+
+/*
+ * These are the registers used in the syscall handler, and allow us to
+ * have in theory up to 7 arguments to a function - r0 to r6.
+ *
+ * r7 is reserved for the system call number for thumb mode.
+ *
+ * Note that tbl == why is intentional.
+ *
+ * We must set at least "tsk" and "why" when calling ret_with_reschedule.
+ */
+scno	.req	r7		@ syscall number
+tbl	.req	r8		@ syscall table pointer
+why	.req	r8		@ Linux syscall (!= 0)
+tsk	.req	r9		@ current thread_info
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
new file mode 100644
index 000000000..8944f4991
--- /dev/null
+++ b/arch/arm/kernel/entry-v7m.S
@@ -0,0 +1,143 @@
+/*
+ * linux/arch/arm/kernel/entry-v7m.S
+ *
+ * Copyright (C) 2008 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Low-level vector interface routines for the ARMv7-M architecture
+ */
+#include <asm/memory.h>
+#include <asm/glue.h>
+#include <asm/thread_notify.h>
+#include <asm/v7m.h>
+
+#include "entry-header.S"
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#error "CONFIG_TRACE_IRQFLAGS not supported on the current ARMv7M implementation"
+#endif
+
+__invalid_entry:
+	v7m_exception_entry
+#ifdef CONFIG_PRINTK
+	adr	r0, strerr
+	mrs	r1, ipsr
+	mov	r2, lr
+	bl	printk
+#endif
+	mov	r0, sp
+	bl	show_regs
+1:	b	1b
+ENDPROC(__invalid_entry)
+
+strerr:	.asciz	"\nUnhandled exception: IPSR = %08lx LR = %08lx\n"
+
+	.align	2
+__irq_entry:
+	v7m_exception_entry
+
+	@
+	@ Invoke the IRQ handler
+	@
+	mrs	r0, ipsr
+	ldr	r1, =V7M_xPSR_EXCEPTIONNO
+	and	r0, r1
+	sub	r0, #16
+	mov	r1, sp
+	stmdb	sp!, {lr}
+	@ routine called with r0 = irq number, r1 = struct pt_regs *
+	bl	nvic_handle_irq
+
+	pop	{lr}
+	@
+	@ Check for any pending work if returning to user
+	@
+	ldr	r1, =BASEADDR_V7M_SCB
+	ldr	r0, [r1, V7M_SCB_ICSR]
+	tst	r0, V7M_SCB_ICSR_RETTOBASE
+	beq	2f
+
+	get_thread_info tsk
+	ldr	r2, [tsk, #TI_FLAGS]
+	tst	r2, #_TIF_WORK_MASK
+	beq	2f			@ no work pending
+	mov	r0, #V7M_SCB_ICSR_PENDSVSET
+	str	r0, [r1, V7M_SCB_ICSR]	@ raise PendSV
+
+2:
+	@ registers r0-r3 and r12 are automatically restored on exception
+	@ return. r4-r7 were not clobbered in v7m_exception_entry so for
+	@ correctness they don't need to be restored. So only r8-r11 must be
+	@ restored here. The easiest way to do so is to restore r0-r7, too.
+	ldmia	sp!, {r0-r11}
+	add	sp, #S_FRAME_SIZE-S_IP
+	cpsie	i
+	bx	lr
+ENDPROC(__irq_entry)
+
+__pendsv_entry:
+	v7m_exception_entry
+
+	ldr	r1, =BASEADDR_V7M_SCB
+	mov	r0, #V7M_SCB_ICSR_PENDSVCLR
+	str	r0, [r1, V7M_SCB_ICSR]	@ clear PendSV
+
+	@ execute the pending work, including reschedule
+	get_thread_info tsk
+	mov	why, #0
+	b	ret_to_user
+ENDPROC(__pendsv_entry)
+
+/*
+ * Register switch for ARMv7-M processors.
+ * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
+ * previous and next are guaranteed not to be the same.
+ */
+ENTRY(__switch_to)
+	.fnstart
+	.cantunwind
+	add	ip, r1, #TI_CPU_SAVE
+	stmia	ip!, {r4 - r11}		@ Store most regs on stack
+	str	sp, [ip], #4
+	str	lr, [ip], #4
+	mov	r5, r0
+	add	r4, r2, #TI_CPU_SAVE
+	ldr	r0, =thread_notify_head
+	mov	r1, #THREAD_NOTIFY_SWITCH
+	bl	atomic_notifier_call_chain
+	mov	ip, r4
+	mov	r0, r5
+	ldmia	ip!, {r4 - r11}		@ Load all regs saved previously
+	ldr	sp, [ip]
+	ldr	pc, [ip, #4]!
+	.fnend
+ENDPROC(__switch_to)
+
+	.data
+	.align	8
+/*
+ * Vector table (64 words => 256 bytes natural alignment)
+ */
+ENTRY(vector_table)
+	.long	0			@ 0 - Reset stack pointer
+	.long	__invalid_entry		@ 1 - Reset
+	.long	__invalid_entry		@ 2 - NMI
+	.long	__invalid_entry		@ 3 - HardFault
+	.long	__invalid_entry		@ 4 - MemManage
+	.long	__invalid_entry		@ 5 - BusFault
+	.long	__invalid_entry		@ 6 - UsageFault
+	.long	__invalid_entry		@ 7 - Reserved
+	.long	__invalid_entry		@ 8 - Reserved
+	.long	__invalid_entry		@ 9 - Reserved
+	.long	__invalid_entry		@ 10 - Reserved
+	.long	vector_swi		@ 11 - SVCall
+	.long	__invalid_entry		@ 12 - Debug Monitor
+	.long	__invalid_entry		@ 13 - Reserved
+	.long	__pendsv_entry		@ 14 - PendSV
+	.long	__invalid_entry		@ 15 - SysTick
+	.rept	64 - 16
+	.long	__irq_entry		@ 16..64 - External Interrupts
+	.endr
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
new file mode 100644
index 000000000..059c3da0f
--- /dev/null
+++ b/arch/arm/kernel/fiq.c
@@ -0,0 +1,164 @@
+/*
+ *  linux/arch/arm/kernel/fiq.c
+ *
+ *  Copyright (C) 1998 Russell King
+ *  Copyright (C) 1998, 1999 Phil Blundell
+ *
+ *  FIQ support written by Philip Blundell <philb@gnu.org>, 1998.
+ *
+ *  FIQ support re-written by Russell King to be more generic
+ *
+ * We now properly support a method by which the FIQ handlers can
+ * be stacked onto the vector.  We still do not support sharing
+ * the FIQ vector itself.
+ *
+ * Operation is as follows:
+ *  1. Owner A claims FIQ:
+ *     - default_fiq relinquishes control.
+ *  2. Owner A:
+ *     - inserts code.
+ *     - sets any registers,
+ *     - enables FIQ.
+ *  3. Owner B claims FIQ:
+ *     - if owner A has a relinquish function.
+ *       - disable FIQs.
+ *       - saves any registers.
+ *       - returns zero.
+ *  4. Owner B:
+ *     - inserts code.
+ *     - sets any registers,
+ *     - enables FIQ.
+ *  5. Owner B releases FIQ:
+ *     - Owner A is asked to reacquire FIQ:
+ *	 - inserts code.
+ *	 - restores saved registers.
+ *	 - enables FIQ.
+ *  6. Goto 3
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/fiq.h>
+#include <asm/irq.h>
+#include <asm/traps.h>
+
+#define FIQ_OFFSET ({					\
+		extern void *vector_fiq_offset;		\
+		(unsigned)&vector_fiq_offset;		\
+	})
+
+static unsigned long dfl_fiq_insn;
+static struct pt_regs dfl_fiq_regs;
+
+/* Default reacquire function
+ * - we always relinquish FIQ control
+ * - we always reacquire FIQ control
+ */
+static int fiq_def_op(void *ref, int relinquish)
+{
+	if (!relinquish) {
+		/* Restore default handler and registers */
+		local_fiq_disable();
+		set_fiq_regs(&dfl_fiq_regs);
+		set_fiq_handler(&dfl_fiq_insn, sizeof(dfl_fiq_insn));
+		local_fiq_enable();
+
+		/* FIXME: notify irq controller to standard enable FIQs */
+	}
+
+	return 0;
+}
+
+static struct fiq_handler default_owner = {
+	.name	= "default",
+	.fiq_op = fiq_def_op,
+};
+
+static struct fiq_handler *current_fiq = &default_owner;
+
+int show_fiq_list(struct seq_file *p, int prec)
+{
+	if (current_fiq != &default_owner)
+		seq_printf(p, "%*s:              %s\n", prec, "FIQ",
+			current_fiq->name);
+
+	return 0;
+}
+
+void set_fiq_handler(void *start, unsigned int length)
+{
+	void *base = vectors_page;
+	unsigned offset = FIQ_OFFSET;
+
+	memcpy(base + offset, start, length);
+	if (!cache_is_vipt_nonaliasing())
+		flush_icache_range((unsigned long)base + offset, offset +
+				   length);
+	flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length);
+}
+
+int claim_fiq(struct fiq_handler *f)
+{
+	int ret = 0;
+
+	if (current_fiq) {
+		ret = -EBUSY;
+
+		if (current_fiq->fiq_op != NULL)
+			ret = current_fiq->fiq_op(current_fiq->dev_id, 1);
+	}
+
+	if (!ret) {
+		f->next = current_fiq;
+		current_fiq = f;
+	}
+
+	return ret;
+}
+
+void release_fiq(struct fiq_handler *f)
+{
+	if (current_fiq != f) {
+		pr_err("%s FIQ trying to release %s FIQ\n",
+		       f->name, current_fiq->name);
+		dump_stack();
+		return;
+	}
+
+	do
+		current_fiq = current_fiq->next;
+	while (current_fiq->fiq_op(current_fiq->dev_id, 0));
+}
+
+static int fiq_start;
+
+void enable_fiq(int fiq)
+{
+	enable_irq(fiq + fiq_start);
+}
+
+void disable_fiq(int fiq)
+{
+	disable_irq(fiq + fiq_start);
+}
+
+EXPORT_SYMBOL(set_fiq_handler);
+EXPORT_SYMBOL(__set_fiq_regs);	/* defined in fiqasm.S */
+EXPORT_SYMBOL(__get_fiq_regs);	/* defined in fiqasm.S */
+EXPORT_SYMBOL(claim_fiq);
+EXPORT_SYMBOL(release_fiq);
+EXPORT_SYMBOL(enable_fiq);
+EXPORT_SYMBOL(disable_fiq);
+
+void __init init_FIQ(int start)
+{
+	unsigned offset = FIQ_OFFSET;
+	dfl_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
+	get_fiq_regs(&dfl_fiq_regs);
+	fiq_start = start;
+}
diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S
new file mode 100644
index 000000000..8dd26e1a9
--- /dev/null
+++ b/arch/arm/kernel/fiqasm.S
@@ -0,0 +1,49 @@
+/*
+ *  linux/arch/arm/kernel/fiqasm.S
+ *
+ *  Derived from code originally in linux/arch/arm/kernel/fiq.c:
+ *
+ *  Copyright (C) 1998 Russell King
+ *  Copyright (C) 1998, 1999 Phil Blundell
+ *  Copyright (C) 2011, Linaro Limited
+ *
+ *  FIQ support written by Philip Blundell <philb@gnu.org>, 1998.
+ *
+ *  FIQ support re-written by Russell King to be more generic
+ *
+ *  v7/Thumb-2 compatibility modifications by Linaro Limited, 2011.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Taking an interrupt in FIQ mode is death, so both these functions
+ * disable irqs for the duration.
+ */
+
+ENTRY(__set_fiq_regs)
+	mov	r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE
+	mrs	r1, cpsr
+	msr	cpsr_c, r2	@ select FIQ mode
+	mov	r0, r0		@ avoid hazard prior to ARMv4
+	ldmia	r0!, {r8 - r12}
+	ldr	sp, [r0], #4
+	ldr	lr, [r0]
+	msr	cpsr_c, r1	@ return to SVC mode
+	mov	r0, r0		@ avoid hazard prior to ARMv4
+	ret	lr
+ENDPROC(__set_fiq_regs)
+
+ENTRY(__get_fiq_regs)
+	mov	r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE
+	mrs	r1, cpsr
+	msr	cpsr_c, r2	@ select FIQ mode
+	mov	r0, r0		@ avoid hazard prior to ARMv4
+	stmia	r0!, {r8 - r12}
+	str	sp, [r0], #4
+	str	lr, [r0]
+	msr	cpsr_c, r1	@ return to SVC mode
+	mov	r0, r0		@ avoid hazard prior to ARMv4
+	ret	lr
+ENDPROC(__get_fiq_regs)
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
new file mode 100644
index 000000000..709ee1d6d
--- /dev/null
+++ b/arch/arm/kernel/ftrace.c
@@ -0,0 +1,274 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Abhishek Sagar <sagar.abhishek@gmail.com>
+ * Copyright (C) 2010 Rabin Vincent <rabin@rab.in>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched with NOP till they are enabled.
+ * All code mutation routines here are called under stop_machine().
+ */
+
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cacheflush.h>
+#include <asm/opcodes.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define	NOP		0xf85deb04	/* pop.w {lr} */
+#else
+#define	NOP		0xe8bd4000	/* pop {lr} */
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_OLD_MCOUNT
+#define OLD_MCOUNT_ADDR	((unsigned long) mcount)
+#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
+
+#define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
+
+static int __ftrace_modify_code(void *data)
+{
+	int *command = data;
+
+	set_kernel_text_rw();
+	ftrace_modify_all_code(*command);
+	set_kernel_text_ro();
+
+	return 0;
+}
+
+void arch_ftrace_update_code(int command)
+{
+	stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
+static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
+{
+	return rec->arch.old_mcount ? OLD_NOP : NOP;
+}
+
+static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
+{
+	if (!rec->arch.old_mcount)
+		return addr;
+
+	if (addr == MCOUNT_ADDR)
+		addr = OLD_MCOUNT_ADDR;
+	else if (addr == FTRACE_ADDR)
+		addr = OLD_FTRACE_ADDR;
+
+	return addr;
+}
+#else
+static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
+{
+	return NOP;
+}
+
+static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
+{
+	return addr;
+}
+#endif
+
+int ftrace_arch_code_modify_prepare(void)
+{
+	set_all_modules_text_rw();
+	return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+	set_all_modules_text_ro();
+	/* Make sure any TLB misses during machine stop are cleared. */
+	flush_tlb_all();
+	return 0;
+}
+
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+	return arm_gen_branch_link(pc, addr);
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned long old,
+			      unsigned long new, bool validate)
+{
+	unsigned long replaced;
+
+	if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
+		old = __opcode_to_mem_thumb32(old);
+		new = __opcode_to_mem_thumb32(new);
+	} else {
+		old = __opcode_to_mem_arm(old);
+		new = __opcode_to_mem_arm(new);
+	}
+
+	if (validate) {
+		if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
+			return -EFAULT;
+
+		if (replaced != old)
+			return -EINVAL;
+	}
+
+	if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE))
+		return -EPERM;
+
+	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long pc;
+	unsigned long new;
+	int ret;
+
+	pc = (unsigned long)&ftrace_call;
+	new = ftrace_call_replace(pc, (unsigned long)func);
+
+	ret = ftrace_modify_code(pc, 0, new, false);
+
+#ifdef CONFIG_OLD_MCOUNT
+	if (!ret) {
+		pc = (unsigned long)&ftrace_call_old;
+		new = ftrace_call_replace(pc, (unsigned long)func);
+
+		ret = ftrace_modify_code(pc, 0, new, false);
+	}
+#endif
+
+	return ret;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long new, old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_nop_replace(rec);
+	new = ftrace_call_replace(ip, adjust_address(rec, addr));
+
+	return ftrace_modify_code(rec->ip, old, new, true);
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned long old;
+	unsigned long new;
+	int ret;
+
+	old = ftrace_call_replace(ip, adjust_address(rec, addr));
+	new = ftrace_nop_replace(rec);
+	ret = ftrace_modify_code(ip, old, new, true);
+
+#ifdef CONFIG_OLD_MCOUNT
+	if (ret == -EINVAL && addr == MCOUNT_ADDR) {
+		rec->arch.old_mcount = true;
+
+		old = ftrace_call_replace(ip, adjust_address(rec, addr));
+		new = ftrace_nop_replace(rec);
+		ret = ftrace_modify_code(ip, old, new, true);
+	}
+#endif
+
+	return ret;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long) &return_to_handler;
+	struct ftrace_graph_ent trace;
+	unsigned long old;
+	int err;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	old = *parent;
+	*parent = return_hooker;
+
+	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		*parent = old;
+		return;
+	}
+
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+				       frame_pointer);
+	if (err == -EBUSY) {
+		*parent = old;
+		return;
+	}
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_graph_call;
+extern unsigned long ftrace_graph_call_old;
+extern void ftrace_graph_caller_old(void);
+
+static int __ftrace_modify_caller(unsigned long *callsite,
+				  void (*func) (void), bool enable)
+{
+	unsigned long caller_fn = (unsigned long) func;
+	unsigned long pc = (unsigned long) callsite;
+	unsigned long branch = arm_gen_branch(pc, caller_fn);
+	unsigned long nop = 0xe1a00000;	/* mov r0, r0 */
+	unsigned long old = enable ? nop : branch;
+	unsigned long new = enable ? branch : nop;
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+	int ret;
+
+	ret = __ftrace_modify_caller(&ftrace_graph_call,
+				     ftrace_graph_caller,
+				     enable);
+
+#ifdef CONFIG_OLD_MCOUNT
+	if (!ret)
+		ret = __ftrace_modify_caller(&ftrace_graph_call_old,
+					     ftrace_graph_caller_old,
+					     enable);
+#endif
+
+	return ret;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
new file mode 100644
index 000000000..8733012d2
--- /dev/null
+++ b/arch/arm/kernel/head-common.S
@@ -0,0 +1,223 @@
+/*
+ *  linux/arch/arm/kernel/head-common.S
+ *
+ *  Copyright (C) 1994-2002 Russell King
+ *  Copyright (c) 2003 ARM Limited
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <asm/assembler.h>
+
+#define ATAG_CORE 0x54410001
+#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
+#define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2)
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define OF_DT_MAGIC 0xd00dfeed
+#else
+#define OF_DT_MAGIC 0xedfe0dd0 /* 0xd00dfeed in big-endian */
+#endif
+
+/*
+ * Exception handling.  Something went wrong and we can't proceed.  We
+ * ought to tell the user, but since we don't have any guarantee that
+ * we're even running on the right architecture, we do virtually nothing.
+ *
+ * If CONFIG_DEBUG_LL is set we try to print out something about the error
+ * and hope for the best (useful if bootloader fails to pass a proper
+ * machine ID for example).
+ */
+	__HEAD
+
+/* Determine validity of the r2 atags pointer.  The heuristic requires
+ * that the pointer be aligned, in the first 16k of physical RAM and
+ * that the ATAG_CORE marker is first and present.  If CONFIG_OF_FLATTREE
+ * is selected, then it will also accept a dtb pointer.  Future revisions
+ * of this function may be more lenient with the physical address and
+ * may also be able to move the ATAGS block if necessary.
+ *
+ * Returns:
+ *  r2 either valid atags pointer, valid dtb pointer, or zero
+ *  r5, r6 corrupted
+ */
+__vet_atags:
+	tst	r2, #0x3			@ aligned?
+	bne	1f
+
+	ldr	r5, [r2, #0]
+#ifdef CONFIG_OF_FLATTREE
+	ldr	r6, =OF_DT_MAGIC		@ is it a DTB?
+	cmp	r5, r6
+	beq	2f
+#endif
+	cmp	r5, #ATAG_CORE_SIZE		@ is first tag ATAG_CORE?
+	cmpne	r5, #ATAG_CORE_SIZE_EMPTY
+	bne	1f
+	ldr	r5, [r2, #4]
+	ldr	r6, =ATAG_CORE
+	cmp	r5, r6
+	bne	1f
+
+2:	ret	lr				@ atag/dtb pointer is ok
+
+1:	mov	r2, #0
+	ret	lr
+ENDPROC(__vet_atags)
+
+/*
+ * The following fragment of code is executed with the MMU on in MMU mode,
+ * and uses absolute addresses; this is not position independent.
+ *
+ *  r0  = cp#15 control register
+ *  r1  = machine ID
+ *  r2  = atags/dtb pointer
+ *  r9  = processor ID
+ */
+	__INIT
+__mmap_switched:
+	adr	r3, __mmap_switched_data
+
+	ldmia	r3!, {r4, r5, r6, r7}
+	cmp	r4, r5				@ Copy data segment if needed
+1:	cmpne	r5, r6
+	ldrne	fp, [r4], #4
+	strne	fp, [r5], #4
+	bne	1b
+
+	mov	fp, #0				@ Clear BSS (and zero fp)
+1:	cmp	r6, r7
+	strcc	fp, [r6],#4
+	bcc	1b
+
+ ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
+ THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
+ THUMB(	ldr	sp, [r3, #16]		)
+	str	r9, [r4]			@ Save processor ID
+	str	r1, [r5]			@ Save machine type
+	str	r2, [r6]			@ Save atags pointer
+	cmp	r7, #0
+	strne	r0, [r7]			@ Save control register values
+	b	start_kernel
+ENDPROC(__mmap_switched)
+
+	.align	2
+	.type	__mmap_switched_data, %object
+__mmap_switched_data:
+	.long	__data_loc			@ r4
+	.long	_sdata				@ r5
+	.long	__bss_start			@ r6
+	.long	_end				@ r7
+	.long	processor_id			@ r4
+	.long	__machine_arch_type		@ r5
+	.long	__atags_pointer			@ r6
+#ifdef CONFIG_CPU_CP15
+	.long	cr_alignment			@ r7
+#else
+	.long	0				@ r7
+#endif
+	.long	init_thread_union + THREAD_START_SP @ sp
+	.size	__mmap_switched_data, . - __mmap_switched_data
+
+/*
+ * This provides a C-API version of __lookup_processor_type
+ */
+ENTRY(lookup_processor_type)
+	stmfd	sp!, {r4 - r6, r9, lr}
+	mov	r9, r0
+	bl	__lookup_processor_type
+	mov	r0, r5
+	ldmfd	sp!, {r4 - r6, r9, pc}
+ENDPROC(lookup_processor_type)
+
+	__FINIT
+	.text
+
+/*
+ * Read processor ID register (CP#15, CR0), and look up in the linker-built
+ * supported processor list.  Note that we can't use the absolute addresses
+ * for the __proc_info lists since we aren't running with the MMU on
+ * (and therefore, we are not in the correct address space).  We have to
+ * calculate the offset.
+ *
+ *	r9 = cpuid
+ * Returns:
+ *	r3, r4, r6 corrupted
+ *	r5 = proc_info pointer in physical address space
+ *	r9 = cpuid (preserved)
+ */
+__lookup_processor_type:
+	adr	r3, __lookup_processor_type_data
+	ldmia	r3, {r4 - r6}
+	sub	r3, r3, r4			@ get offset between virt&phys
+	add	r5, r5, r3			@ convert virt addresses to
+	add	r6, r6, r3			@ physical address space
+1:	ldmia	r5, {r3, r4}			@ value, mask
+	and	r4, r4, r9			@ mask wanted bits
+	teq	r3, r4
+	beq	2f
+	add	r5, r5, #PROC_INFO_SZ		@ sizeof(proc_info_list)
+	cmp	r5, r6
+	blo	1b
+	mov	r5, #0				@ unknown processor
+2:	ret	lr
+ENDPROC(__lookup_processor_type)
+
+/*
+ * Look in <asm/procinfo.h> for information about the __proc_info structure.
+ */
+	.align	2
+	.type	__lookup_processor_type_data, %object
+__lookup_processor_type_data:
+	.long	.
+	.long	__proc_info_begin
+	.long	__proc_info_end
+	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
+
+__error_lpae:
+#ifdef CONFIG_DEBUG_LL
+	adr	r0, str_lpae
+	bl 	printascii
+	b	__error
+str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n"
+#else
+	b	__error
+#endif
+	.align
+ENDPROC(__error_lpae)
+
+__error_p:
+#ifdef CONFIG_DEBUG_LL
+	adr	r0, str_p1
+	bl	printascii
+	mov	r0, r9
+	bl	printhex8
+	adr	r0, str_p2
+	bl	printascii
+	b	__error
+str_p1:	.asciz	"\nError: unrecognized/unsupported processor variant (0x"
+str_p2:	.asciz	").\n"
+	.align
+#endif
+ENDPROC(__error_p)
+
+__error:
+#ifdef CONFIG_ARCH_RPC
+/*
+ * Turn the screen red on a error - RiscPC only.
+ */
+	mov	r0, #0x02000000
+	mov	r3, #0x11
+	orr	r3, r3, r3, lsl #8
+	orr	r3, r3, r3, lsl #16
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+#endif
+1:	mov	r0, r0
+	b	1b
+ENDPROC(__error)
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
new file mode 100644
index 000000000..aebfbf79a
--- /dev/null
+++ b/arch/arm/kernel/head-nommu.S
@@ -0,0 +1,260 @@
+/*
+ *  linux/arch/arm/kernel/head-nommu.S
+ *
+ *  Copyright (C) 1994-2002 Russell King
+ *  Copyright (C) 2003-2006 Hyok S. Choi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Common kernel startup code (non-paged MM)
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include <asm/assembler.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <asm/memory.h>
+#include <asm/cp15.h>
+#include <asm/thread_info.h>
+#include <asm/v7m.h>
+#include <asm/mpu.h>
+#include <asm/page.h>
+
+/*
+ * Kernel startup entry point.
+ * ---------------------------
+ *
+ * This is normally called from the decompressor code.  The requirements
+ * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
+ * r1 = machine nr.
+ *
+ * See linux/arch/arm/tools/mach-types for the complete list of machine
+ * numbers for r1.
+ *
+ */
+
+	__HEAD
+
+#ifdef CONFIG_CPU_THUMBONLY
+	.thumb
+ENTRY(stext)
+#else
+	.arm
+ENTRY(stext)
+
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+#endif
+
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
+						@ and irqs disabled
+#if defined(CONFIG_CPU_CP15)
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+#elif defined(CONFIG_CPU_V7M)
+	ldr	r9, =BASEADDR_V7M_SCB
+	ldr	r9, [r9, V7M_SCB_CPUID]
+#else
+	ldr	r9, =CONFIG_PROCESSOR_ID
+#endif
+	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
+	movs	r10, r5				@ invalid processor (r5=0)?
+	beq	__error_p				@ yes, error 'p'
+
+#ifdef CONFIG_ARM_MPU
+	/* Calculate the size of a region covering just the kernel */
+	ldr	r5, =PLAT_PHYS_OFFSET		@ Region start: PHYS_OFFSET
+	ldr     r6, =(_end)			@ Cover whole kernel
+	sub	r6, r6, r5			@ Minimum size of region to map
+	clz	r6, r6				@ Region size must be 2^N...
+	rsb	r6, r6, #31			@ ...so round up region size
+	lsl	r6, r6, #MPU_RSR_SZ		@ Put size in right field
+	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
+	bl	__setup_mpu
+#endif
+	ldr	r13, =__mmap_switched		@ address to jump to after
+						@ initialising sctlr
+	adr	lr, BSYM(1f)			@ return (PIC) address
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
+ 1:	b	__after_proc_init
+ENDPROC(stext)
+
+#ifdef CONFIG_SMP
+	.text
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 *
+	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
+	 * the processor type - there is no need to check the machine type
+	 * as it has already been validated by the primary processor.
+	 */
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
+#ifndef CONFIG_CPU_CP15
+	ldr	r9, =CONFIG_PROCESSOR_ID
+#else
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+#endif
+	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
+	movs	r10, r5				@ invalid processor?
+	beq	__error_p			@ yes, error 'p'
+
+	adr	r4, __secondary_data
+	ldmia	r4, {r7, r12}
+
+#ifdef CONFIG_ARM_MPU
+	/* Use MPU region info supplied by __cpu_up */
+	ldr	r6, [r7]			@ get secondary_data.mpu_szr
+	bl      __setup_mpu			@ Initialize the MPU
+#endif
+
+	adr	lr, BSYM(__after_proc_init)	@ return address
+	mov	r13, r12			@ __secondary_switched address
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
+ENDPROC(secondary_startup)
+
+ENTRY(__secondary_switched)
+	ldr	sp, [r7, #8]			@ set up the stack pointer
+	mov	fp, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+
+	.type	__secondary_data, %object
+__secondary_data:
+	.long	secondary_data
+	.long	__secondary_switched
+#endif /* CONFIG_SMP */
+
+/*
+ * Set the Control Register and Read the process ID.
+ */
+__after_proc_init:
+#ifdef CONFIG_CPU_CP15
+	/*
+	 * CP15 system control register value returned in r0 from
+	 * the CPU init function.
+	 */
+#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
+	orr	r0, r0, #CR_A
+#else
+	bic	r0, r0, #CR_A
+#endif
+#ifdef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CR_C
+#endif
+#ifdef CONFIG_CPU_BPREDICT_DISABLE
+	bic	r0, r0, #CR_Z
+#endif
+#ifdef CONFIG_CPU_ICACHE_DISABLE
+	bic	r0, r0, #CR_I
+#endif
+#ifdef CONFIG_CPU_HIGH_VECTOR
+	orr	r0, r0, #CR_V
+#else
+	bic	r0, r0, #CR_V
+#endif
+	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
+#endif /* CONFIG_CPU_CP15 */
+	ret	r13
+ENDPROC(__after_proc_init)
+	.ltorg
+
+#ifdef CONFIG_ARM_MPU
+
+
+/* Set which MPU region should be programmed */
+.macro set_region_nr tmp, rgnr
+	mov	\tmp, \rgnr			@ Use static region numbers
+	mcr	p15, 0, \tmp, c6, c2, 0		@ Write RGNR
+.endm
+
+/* Setup a single MPU region, either D or I side (D-side for unified) */
+.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE
+	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
+	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
+	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
+.endm
+
+/*
+ * Setup the MPU and initial MPU Regions. We create the following regions:
+ * Region 0: Use this for probing the MPU details, so leave disabled.
+ * Region 1: Background region - covers the whole of RAM as strongly ordered
+ * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
+ * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
+ *
+ * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
+*/
+
+ENTRY(__setup_mpu)
+
+	/* Probe for v7 PMSA compliance */
+	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
+	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
+	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
+	bne	__error_p			@ Fail: ARM_MPU on NOT v7 PMSA
+
+	/* Determine whether the D/I-side memory map is unified. We set the
+	 * flags here and continue to use them for the rest of this function */
+	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
+	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
+	beq	__error_p			@ Fail: ARM_MPU and no MPU
+	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
+
+	/* Setup second region first to free up r6 */
+	set_region_nr r0, #MPU_RAM_REGION
+	isb
+	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
+	ldr	r0, =PLAT_PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
+	ldr	r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ PHYS_OFFSET, shared, enabled
+	beq	1f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled
+1:	isb
+
+	/* First/background region */
+	set_region_nr r0, #MPU_BG_REGION
+	isb
+	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
+	mov	r0, #0				@ BG region starts at 0x0
+	ldr	r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
+	mov	r6, #MPU_RSR_ALL_MEM		@ 4GB region, enabled
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ 0x0, BG region, enabled
+	beq	2f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
+2:	isb
+
+	/* Vectors region */
+	set_region_nr r0, #MPU_VECTORS_REGION
+	isb
+	/* Shared, inaccessible to PL0, rw PL1 */
+	mov	r0, #CONFIG_VECTORS_BASE	@ Cover from VECTORS_BASE
+	ldr	r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL)
+	/* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */
+	mov	r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+	beq	3f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+3:	isb
+
+	/* Enable the MPU */
+	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
+	bic     r0, r0, #CR_BR			@ Disable the 'default mem-map'
+	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
+	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
+	isb
+	ret	lr
+ENDPROC(__setup_mpu)
+#endif
+#include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
new file mode 100644
index 000000000..3637973a9
--- /dev/null
+++ b/arch/arm/kernel/head.S
@@ -0,0 +1,712 @@
+/*
+ *  linux/arch/arm/kernel/head.S
+ *
+ *  Copyright (C) 1994-2002 Russell King
+ *  Copyright (c) 2003 ARM Limited
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Kernel startup code for all 32-bit CPUs
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include <asm/assembler.h>
+#include <asm/cp15.h>
+#include <asm/domain.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <asm/memory.h>
+#include <asm/thread_info.h>
+#include <asm/pgtable.h>
+
+#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_SEMIHOSTING)
+#include CONFIG_DEBUG_LL_INCLUDE
+#endif
+
+/*
+ * swapper_pg_dir is the virtual address of the initial page table.
+ * We place the page tables 16K below KERNEL_RAM_VADDR.  Therefore, we must
+ * make sure that KERNEL_RAM_VADDR is correctly set.  Currently, we expect
+ * the least significant 16 bits to be 0x8000, but we could probably
+ * relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.
+ */
+#define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET)
+#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000
+#error KERNEL_RAM_VADDR must start at 0xXXXX8000
+#endif
+
+#ifdef CONFIG_ARM_LPAE
+	/* LPAE requires an additional page for the PGD */
+#define PG_DIR_SIZE	0x5000
+#define PMD_ORDER	3
+#else
+#define PG_DIR_SIZE	0x4000
+#define PMD_ORDER	2
+#endif
+
+	.globl	swapper_pg_dir
+	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE
+
+	.macro	pgtbl, rd, phys
+	add	\rd, \phys, #TEXT_OFFSET
+	sub	\rd, \rd, #PG_DIR_SIZE
+	.endm
+
+/*
+ * Kernel startup entry point.
+ * ---------------------------
+ *
+ * This is normally called from the decompressor code.  The requirements
+ * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
+ * r1 = machine nr, r2 = atags or dtb pointer.
+ *
+ * This code is mostly position independent, so if you link the kernel at
+ * 0xc0008000, you call this at __pa(0xc0008000).
+ *
+ * See linux/arch/arm/tools/mach-types for the complete list of machine
+ * numbers for r1.
+ *
+ * We're trying to keep crap to a minimum; DO NOT add any machine specific
+ * crap here - that's what the boot loader (or in extreme, well justified
+ * circumstances, zImage) is for.
+ */
+	.arm
+
+	__HEAD
+ENTRY(stext)
+ ARM_BE8(setend	be )			@ ensure we are in BE8 mode
+
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+
+#ifdef CONFIG_ARM_VIRT_EXT
+	bl	__hyp_stub_install
+#endif
+	@ ensure svc mode and all interrupts masked
+	safe_svcmode_maskall r9
+
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
+	movs	r10, r5				@ invalid processor (r5=0)?
+ THUMB( it	eq )		@ force fixup-able long branch encoding
+	beq	__error_p			@ yes, error 'p'
+
+#ifdef CONFIG_ARM_LPAE
+	mrc	p15, 0, r3, c0, c1, 4		@ read ID_MMFR0
+	and	r3, r3, #0xf			@ extract VMSA support
+	cmp	r3, #5				@ long-descriptor translation table format?
+ THUMB( it	lo )				@ force fixup-able long branch encoding
+	blo	__error_lpae			@ only classic page table format
+#endif
+
+#ifndef CONFIG_XIP_KERNEL
+	adr	r3, 2f
+	ldmia	r3, {r4, r8}
+	sub	r4, r3, r4			@ (PHYS_OFFSET - PAGE_OFFSET)
+	add	r8, r8, r4			@ PHYS_OFFSET
+#else
+	ldr	r8, =PLAT_PHYS_OFFSET		@ always constant in this case
+#endif
+
+	/*
+	 * r1 = machine no, r2 = atags or dtb,
+	 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
+	 */
+	bl	__vet_atags
+#ifdef CONFIG_SMP_ON_UP
+	bl	__fixup_smp
+#endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+	bl	__fixup_pv_table
+#endif
+	bl	__create_page_tables
+
+	/*
+	 * The following calls CPU specific code in a position independent
+	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
+	 * xxx_proc_info structure selected by __lookup_processor_type
+	 * above.  On return, the CPU will be ready for the MMU to be
+	 * turned on, and r0 will hold the CPU control register value.
+	 */
+	ldr	r13, =__mmap_switched		@ address to jump to after
+						@ mmu has been enabled
+	adr	lr, BSYM(1f)			@ return (PIC) address
+	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
+1:	b	__enable_mmu
+ENDPROC(stext)
+	.ltorg
+#ifndef CONFIG_XIP_KERNEL
+2:	.long	.
+	.long	PAGE_OFFSET
+#endif
+
+/*
+ * Setup the initial page tables.  We only setup the barest
+ * amount which are required to get the kernel running, which
+ * generally means mapping in the kernel code.
+ *
+ * r8 = phys_offset, r9 = cpuid, r10 = procinfo
+ *
+ * Returns:
+ *  r0, r3, r5-r7 corrupted
+ *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ */
+__create_page_tables:
+	pgtbl	r4, r8				@ page table address
+
+	/*
+	 * Clear the swapper page table
+	 */
+	mov	r0, r4
+	mov	r3, #0
+	add	r6, r0, #PG_DIR_SIZE
+1:	str	r3, [r0], #4
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+	teq	r0, r6
+	bne	1b
+
+#ifdef CONFIG_ARM_LPAE
+	/*
+	 * Build the PGD table (first level) to point to the PMD table. A PGD
+	 * entry is 64-bit wide.
+	 */
+	mov	r0, r4
+	add	r3, r4, #0x1000			@ first PMD table address
+	orr	r3, r3, #3			@ PGD block type
+	mov	r6, #4				@ PTRS_PER_PGD
+	mov	r7, #1 << (55 - 32)		@ L_PGD_SWAPPER
+1:
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	str	r7, [r0], #4			@ set top PGD entry bits
+	str	r3, [r0], #4			@ set bottom PGD entry bits
+#else
+	str	r3, [r0], #4			@ set bottom PGD entry bits
+	str	r7, [r0], #4			@ set top PGD entry bits
+#endif
+	add	r3, r3, #0x1000			@ next PMD table
+	subs	r6, r6, #1
+	bne	1b
+
+	add	r4, r4, #0x1000			@ point to the PMD tables
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	add	r4, r4, #4			@ we only write the bottom word
+#endif
+#endif
+
+	ldr	r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
+
+	/*
+	 * Create identity mapping to cater for __enable_mmu.
+	 * This identity mapping will be removed by paging_init().
+	 */
+	adr	r0, __turn_mmu_on_loc
+	ldmia	r0, {r3, r5, r6}
+	sub	r0, r0, r3			@ virt->phys offset
+	add	r5, r5, r0			@ phys __turn_mmu_on
+	add	r6, r6, r0			@ phys __turn_mmu_on_end
+	mov	r5, r5, lsr #SECTION_SHIFT
+	mov	r6, r6, lsr #SECTION_SHIFT
+
+1:	orr	r3, r7, r5, lsl #SECTION_SHIFT	@ flags + kernel base
+	str	r3, [r4, r5, lsl #PMD_ORDER]	@ identity mapping
+	cmp	r5, r6
+	addlo	r5, r5, #1			@ next section
+	blo	1b
+
+	/*
+	 * Map our RAM from the start to the end of the kernel .bss section.
+	 */
+	add	r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
+	ldr	r6, =(_end - 1)
+	orr	r3, r8, r7
+	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
+1:	str	r3, [r0], #1 << PMD_ORDER
+	add	r3, r3, #1 << SECTION_SHIFT
+	cmp	r0, r6
+	bls	1b
+
+#ifdef CONFIG_XIP_KERNEL
+	/*
+	 * Map the kernel image separately as it is not located in RAM.
+	 */
+#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
+	mov	r3, pc
+	mov	r3, r3, lsr #SECTION_SHIFT
+	orr	r3, r7, r3, lsl #SECTION_SHIFT
+	add	r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
+	str	r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
+	ldr	r6, =(_edata_loc - 1)
+	add	r0, r0, #1 << PMD_ORDER
+	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
+1:	cmp	r0, r6
+	add	r3, r3, #1 << SECTION_SHIFT
+	strls	r3, [r0], #1 << PMD_ORDER
+	bls	1b
+#endif
+
+	/*
+	 * Then map boot params address in r2 if specified.
+	 * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
+	 */
+	mov	r0, r2, lsr #SECTION_SHIFT
+	movs	r0, r0, lsl #SECTION_SHIFT
+	subne	r3, r0, r8
+	addne	r3, r3, #PAGE_OFFSET
+	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
+	orrne	r6, r7, r0
+	strne	r6, [r3], #1 << PMD_ORDER
+	addne	r6, r6, #1 << SECTION_SHIFT
+	strne	r6, [r3]
+
+#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
+	sub	r4, r4, #4			@ Fixup page table pointer
+						@ for 64-bit descriptors
+#endif
+
+#ifdef CONFIG_DEBUG_LL
+#if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
+	/*
+	 * Map in IO space for serial debugging.
+	 * This allows debug messages to be output
+	 * via a serial console before paging_init.
+	 */
+	addruart r7, r3, r0
+
+	mov	r3, r3, lsr #SECTION_SHIFT
+	mov	r3, r3, lsl #PMD_ORDER
+
+	add	r0, r4, r3
+	mov	r3, r7, lsr #SECTION_SHIFT
+	ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
+	orr	r3, r7, r3, lsl #SECTION_SHIFT
+#ifdef CONFIG_ARM_LPAE
+	mov	r7, #1 << (54 - 32)		@ XN
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	str	r7, [r0], #4
+	str	r3, [r0], #4
+#else
+	str	r3, [r0], #4
+	str	r7, [r0], #4
+#endif
+#else
+	orr	r3, r3, #PMD_SECT_XN
+	str	r3, [r0], #4
+#endif
+
+#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */
+	/* we don't need any serial debugging mappings */
+	ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
+#endif
+
+#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
+	/*
+	 * If we're using the NetWinder or CATS, we also need to map
+	 * in the 16550-type serial port for the debug messages
+	 */
+	add	r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)
+	orr	r3, r7, #0x7c000000
+	str	r3, [r0]
+#endif
+#ifdef CONFIG_ARCH_RPC
+	/*
+	 * Map in screen at 0x02000000 & SCREEN2_BASE
+	 * Similar reasons here - for debug.  This is
+	 * only for Acorn RiscPC architectures.
+	 */
+	add	r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)
+	orr	r3, r7, #0x02000000
+	str	r3, [r0]
+	add	r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)
+	str	r3, [r0]
+#endif
+#endif
+#ifdef CONFIG_ARM_LPAE
+	sub	r4, r4, #0x1000		@ point to the PGD table
+	mov	r4, r4, lsr #ARCH_PGD_SHIFT
+#endif
+	ret	lr
+ENDPROC(__create_page_tables)
+	.ltorg
+	.align
+__turn_mmu_on_loc:
+	.long	.
+	.long	__turn_mmu_on
+	.long	__turn_mmu_on_end
+
+#if defined(CONFIG_SMP)
+	.text
+ENTRY(secondary_startup_arm)
+	.arm
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 *
+	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
+	 * the processor type - there is no need to check the machine type
+	 * as it has already been validated by the primary processor.
+	 */
+
+ ARM_BE8(setend	be)				@ ensure we are in BE8 mode
+
+#ifdef CONFIG_ARM_VIRT_EXT
+	bl	__hyp_stub_install_secondary
+#endif
+	safe_svcmode_maskall r9
+
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+	bl	__lookup_processor_type
+	movs	r10, r5				@ invalid processor?
+	moveq	r0, #'p'			@ yes, error 'p'
+ THUMB( it	eq )		@ force fixup-able long branch encoding
+	beq	__error_p
+
+	/*
+	 * Use the page tables supplied from  __cpu_up.
+	 */
+	adr	r4, __secondary_data
+	ldmia	r4, {r5, r7, r12}		@ address to jump to after
+	sub	lr, r4, r5			@ mmu has been enabled
+	ldr	r4, [r7, lr]			@ get secondary_data.pgdir
+	add	r7, r7, #4
+	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
+	adr	lr, BSYM(__enable_mmu)		@ return address
+	mov	r13, r12			@ __secondary_switched address
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10			@ initialise processor
+						@ (return control reg)
+	ret	r12
+ENDPROC(secondary_startup)
+ENDPROC(secondary_startup_arm)
+
+	/*
+	 * r6  = &secondary_data
+	 */
+ENTRY(__secondary_switched)
+	ldr	sp, [r7, #4]			@ get secondary_data.stack
+	mov	fp, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+
+	.align
+
+	.type	__secondary_data, %object
+__secondary_data:
+	.long	.
+	.long	secondary_data
+	.long	__secondary_switched
+#endif /* defined(CONFIG_SMP) */
+
+
+
+/*
+ * Setup common bits before finally enabling the MMU.  Essentially
+ * this is just loading the page table pointer and domain access
+ * registers.
+ *
+ *  r0  = cp#15 control register
+ *  r1  = machine ID
+ *  r2  = atags or dtb pointer
+ *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ *  r9  = processor ID
+ *  r13 = *virtual* address to jump to upon completion
+ */
+__enable_mmu:
+#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
+	orr	r0, r0, #CR_A
+#else
+	bic	r0, r0, #CR_A
+#endif
+#ifdef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CR_C
+#endif
+#ifdef CONFIG_CPU_BPREDICT_DISABLE
+	bic	r0, r0, #CR_Z
+#endif
+#ifdef CONFIG_CPU_ICACHE_DISABLE
+	bic	r0, r0, #CR_I
+#endif
+#ifndef CONFIG_ARM_LPAE
+	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
+		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
+		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
+		      domain_val(DOMAIN_IO, DOMAIN_CLIENT))
+	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
+	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
+#endif
+	b	__turn_mmu_on
+ENDPROC(__enable_mmu)
+
+/*
+ * Enable the MMU.  This completely changes the structure of the visible
+ * memory space.  You will not be able to trace execution through this.
+ * If you have an enquiry about this, *please* check the linux-arm-kernel
+ * mailing list archives BEFORE sending another post to the list.
+ *
+ *  r0  = cp#15 control register
+ *  r1  = machine ID
+ *  r2  = atags or dtb pointer
+ *  r9  = processor ID
+ *  r13 = *virtual* address to jump to upon completion
+ *
+ * other registers depend on the function called upon completion
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(__turn_mmu_on)
+	mov	r0, r0
+	instr_sync
+	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
+	mrc	p15, 0, r3, c0, c0, 0		@ read id reg
+	instr_sync
+	mov	r3, r3
+	mov	r3, r13
+	ret	r3
+__turn_mmu_on_end:
+ENDPROC(__turn_mmu_on)
+	.popsection
+
+
+#ifdef CONFIG_SMP_ON_UP
+	__HEAD
+__fixup_smp:
+	and	r3, r9, #0x000f0000	@ architecture version
+	teq	r3, #0x000f0000		@ CPU ID supported?
+	bne	__fixup_smp_on_up	@ no, assume UP
+
+	bic	r3, r9, #0x00ff0000
+	bic	r3, r3, #0x0000000f	@ mask 0xff00fff0
+	mov	r4, #0x41000000
+	orr	r4, r4, #0x0000b000
+	orr	r4, r4, #0x00000020	@ val 0x4100b020
+	teq	r3, r4			@ ARM 11MPCore?
+	reteq	lr			@ yes, assume SMP
+
+	mrc	p15, 0, r0, c0, c0, 5	@ read MPIDR
+	and	r0, r0, #0xc0000000	@ multiprocessing extensions and
+	teq	r0, #0x80000000		@ not part of a uniprocessor system?
+	bne    __fixup_smp_on_up	@ no, assume UP
+
+	@ Core indicates it is SMP. Check for Aegis SOC where a single
+	@ Cortex-A9 CPU is present but SMP operations fault.
+	mov	r4, #0x41000000
+	orr	r4, r4, #0x0000c000
+	orr	r4, r4, #0x00000090
+	teq	r3, r4			@ Check for ARM Cortex-A9
+	retne	lr			@ Not ARM Cortex-A9,
+
+	@ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
+	@ below address check will need to be #ifdef'd or equivalent
+	@ for the Aegis platform.
+	mrc	p15, 4, r0, c15, c0	@ get SCU base address
+	teq	r0, #0x0		@ '0' on actual UP A9 hardware
+	beq	__fixup_smp_on_up	@ So its an A9 UP
+	ldr	r0, [r0, #4]		@ read SCU Config
+ARM_BE8(rev	r0, r0)			@ byteswap if big endian
+	and	r0, r0, #0x3		@ number of CPUs
+	teq	r0, #0x0		@ is 1?
+	retne	lr
+
+__fixup_smp_on_up:
+	adr	r0, 1f
+	ldmia	r0, {r3 - r5}
+	sub	r3, r0, r3
+	add	r4, r4, r3
+	add	r5, r5, r3
+	b	__do_fixup_smp_on_up
+ENDPROC(__fixup_smp)
+
+	.align
+1:	.word	.
+	.word	__smpalt_begin
+	.word	__smpalt_end
+
+	.pushsection .data
+	.globl	smp_on_up
+smp_on_up:
+	ALT_SMP(.long	1)
+	ALT_UP(.long	0)
+	.popsection
+#endif
+
+	.text
+__do_fixup_smp_on_up:
+	cmp	r4, r5
+	reths	lr
+	ldmia	r4!, {r0, r6}
+ ARM(	str	r6, [r0, r3]	)
+ THUMB(	add	r0, r0, r3	)
+#ifdef __ARMEB__
+ THUMB(	mov	r6, r6, ror #16	)	@ Convert word order for big-endian.
+#endif
+ THUMB(	strh	r6, [r0], #2	)	@ For Thumb-2, store as two halfwords
+ THUMB(	mov	r6, r6, lsr #16	)	@ to be robust against misaligned r3.
+ THUMB(	strh	r6, [r0]	)
+	b	__do_fixup_smp_on_up
+ENDPROC(__do_fixup_smp_on_up)
+
+ENTRY(fixup_smp)
+	stmfd	sp!, {r4 - r6, lr}
+	mov	r4, r0
+	add	r5, r0, r1
+	mov	r3, #0
+	bl	__do_fixup_smp_on_up
+	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(fixup_smp)
+
+#ifdef __ARMEB__
+#define LOW_OFFSET	0x4
+#define HIGH_OFFSET	0x0
+#else
+#define LOW_OFFSET	0x0
+#define HIGH_OFFSET	0x4
+#endif
+
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+/* __fixup_pv_table - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
+ * can be expressed by an immediate shifter operand. The stub instruction
+ * has a form of '(add|sub) rd, rn, #imm'.
+ */
+	__HEAD
+__fixup_pv_table:
+	adr	r0, 1f
+	ldmia	r0, {r3-r7}
+	mvn	ip, #0
+	subs	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
+	add	r4, r4, r3	@ adjust table start address
+	add	r5, r5, r3	@ adjust table end address
+	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
+	add	r7, r7, r3	@ adjust __pv_offset address
+	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
+	str	r0, [r6]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
+	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
+	mov	r6, r3, lsr #24	@ constant for add/sub instructions
+	teq	r3, r6, lsl #24 @ must be 16MiB aligned
+THUMB(	it	ne		@ cross section branch )
+	bne	__error
+	str	r3, [r7, #LOW_OFFSET]	@ save to __pv_offset low bits
+	b	__fixup_a_pv_table
+ENDPROC(__fixup_pv_table)
+
+	.align
+1:	.long	.
+	.long	__pv_table_begin
+	.long	__pv_table_end
+2:	.long	__pv_phys_pfn_offset
+	.long	__pv_offset
+
+	.text
+__fixup_a_pv_table:
+	adr	r0, 3f
+	ldr	r6, [r0]
+	add	r6, r6, r3
+	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
+	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
+	mov	r6, r6, lsr #24
+	cmn	r0, #1
+#ifdef CONFIG_THUMB2_KERNEL
+	moveq	r0, #0x200000	@ set bit 21, mov to mvn instruction
+	lsls	r6, #24
+	beq	2f
+	clz	r7, r6
+	lsr	r6, #24
+	lsl	r6, r7
+	bic	r6, #0x0080
+	lsrs	r7, #1
+	orrcs	r6, #0x0080
+	orr	r6, r6, r7, lsl #12
+	orr	r6, #0x4000
+	b	2f
+1:	add     r7, r3
+	ldrh	ip, [r7, #2]
+ARM_BE8(rev16	ip, ip)
+	tst	ip, #0x4000
+	and	ip, #0x8f00
+	orrne	ip, r6	@ mask in offset bits 31-24
+	orreq	ip, r0	@ mask in offset bits 7-0
+ARM_BE8(rev16	ip, ip)
+	strh	ip, [r7, #2]
+	bne	2f
+	ldrh	ip, [r7]
+ARM_BE8(rev16	ip, ip)
+	bic	ip, #0x20
+	orr	ip, ip, r0, lsr #16
+ARM_BE8(rev16	ip, ip)
+	strh	ip, [r7]
+2:	cmp	r4, r5
+	ldrcc	r7, [r4], #4	@ use branch for delay slot
+	bcc	1b
+	bx	lr
+#else
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	moveq	r0, #0x00004000	@ set bit 22, mov to mvn instruction
+#else
+	moveq	r0, #0x400000	@ set bit 22, mov to mvn instruction
+#endif
+	b	2f
+1:	ldr	ip, [r7, r3]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	@ in BE8, we load data in BE, but instructions still in LE
+	bic	ip, ip, #0xff000000
+	tst	ip, #0x000f0000	@ check the rotation field
+	orrne	ip, ip, r6, lsl #24 @ mask in offset bits 31-24
+	biceq	ip, ip, #0x00004000 @ clear bit 22
+	orreq	ip, ip, r0      @ mask in offset bits 7-0
+#else
+	bic	ip, ip, #0x000000ff
+	tst	ip, #0xf00	@ check the rotation field
+	orrne	ip, ip, r6	@ mask in offset bits 31-24
+	biceq	ip, ip, #0x400000	@ clear bit 22
+	orreq	ip, ip, r0	@ mask in offset bits 7-0
+#endif
+	str	ip, [r7, r3]
+2:	cmp	r4, r5
+	ldrcc	r7, [r4], #4	@ use branch for delay slot
+	bcc	1b
+	ret	lr
+#endif
+ENDPROC(__fixup_a_pv_table)
+
+	.align
+3:	.long __pv_offset
+
+ENTRY(fixup_pv_table)
+	stmfd	sp!, {r4 - r7, lr}
+	mov	r3, #0			@ no offset
+	mov	r4, r0			@ r0 = table start
+	add	r5, r0, r1		@ r1 = table size
+	bl	__fixup_a_pv_table
+	ldmfd	sp!, {r4 - r7, pc}
+ENDPROC(fixup_pv_table)
+
+	.data
+	.globl	__pv_phys_pfn_offset
+	.type	__pv_phys_pfn_offset, %object
+__pv_phys_pfn_offset:
+	.word	0
+	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
+
+	.globl	__pv_offset
+	.type	__pv_offset, %object
+__pv_offset:
+	.quad	0
+	.size	__pv_offset, . -__pv_offset
+#endif
+
+#include "head-common.S"
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
new file mode 100644
index 000000000..a71501ff6
--- /dev/null
+++ b/arch/arm/kernel/hibernate.c
@@ -0,0 +1,106 @@
+/*
+ * Hibernation support specific for ARM
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ *  https://lkml.org/lkml/2010/6/18/4
+ *  https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ *  https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <asm/system_misc.h>
+#include <asm/idmap.h>
+#include <asm/suspend.h>
+#include <asm/memory.h>
+#include <asm/sections.h>
+#include "reboot.h"
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+	unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+	return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+	local_fiq_disable();
+}
+
+void notrace restore_processor_state(void)
+{
+	local_fiq_enable();
+}
+
+/*
+ * Snapshot kernel memory and reset the system.
+ *
+ * swsusp_save() is executed in the suspend finisher so that the CPU
+ * context pointer and memory are part of the saved image, which is
+ * required by the resume kernel image to restart execution from
+ * swsusp_arch_suspend().
+ *
+ * soft_restart is not technically needed, but is used to get success
+ * returned from cpu_suspend.
+ *
+ * When soft reboot completes, the hibernation snapshot is written out.
+ */
+static int notrace arch_save_image(unsigned long unused)
+{
+	int ret;
+
+	ret = swsusp_save();
+	if (ret == 0)
+		_soft_restart(virt_to_phys(cpu_resume), false);
+	return ret;
+}
+
+/*
+ * Save the current CPU state before suspend / poweroff.
+ */
+int notrace swsusp_arch_suspend(void)
+{
+	return cpu_suspend(0, arch_save_image);
+}
+
+/*
+ * Restore page contents for physical pages that were in use during loading
+ * hibernation image.  Switch to idmap_pgd so the physical page tables
+ * are overwritten with the same contents.
+ */
+static void notrace arch_restore_image(void *unused)
+{
+	struct pbe *pbe;
+
+	cpu_switch_mm(idmap_pgd, &init_mm);
+	for (pbe = restore_pblist; pbe; pbe = pbe->next)
+		copy_page(pbe->orig_address, pbe->address);
+
+	_soft_restart(virt_to_phys(cpu_resume), false);
+}
+
+static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
+
+/*
+ * Resume from the hibernation image.
+ * Due to the kernel heap / data restore, stack contents change underneath
+ * and that would make function calls impossible; switch to a temporary
+ * stack within the nosave region to avoid that problem.
+ */
+int swsusp_arch_resume(void)
+{
+	call_with_stack(arch_restore_image, 0,
+		resume_stack + ARRAY_SIZE(resume_stack));
+	return 0;
+}
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
new file mode 100644
index 000000000..dc7d0a95b
--- /dev/null
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -0,0 +1,1134 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2009, 2010 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+#define pr_fmt(fmt) "hw-breakpoint: " fmt
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/smp.h>
+#include <linux/cpu_pm.h>
+#include <linux/coresight.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/current.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/kdebug.h>
+#include <asm/traps.h>
+
+/* Breakpoint currently in use for each BRP. */
+static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
+
+/* Watchpoint currently in use for each WRP. */
+static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
+
+/* Number of BRP/WRP registers on this CPU. */
+static int core_num_brps;
+static int core_num_wrps;
+
+/* Debug architecture version. */
+static u8 debug_arch;
+
+/* Does debug architecture support OS Save and Restore? */
+static bool has_ossr;
+
+/* Maximum supported watchpoint length. */
+static u8 max_watchpoint_len;
+
+#define READ_WB_REG_CASE(OP2, M, VAL)			\
+	case ((OP2 << 4) + M):				\
+		ARM_DBG_READ(c0, c ## M, OP2, VAL);	\
+		break
+
+#define WRITE_WB_REG_CASE(OP2, M, VAL)			\
+	case ((OP2 << 4) + M):				\
+		ARM_DBG_WRITE(c0, c ## M, OP2, VAL);	\
+		break
+
+#define GEN_READ_WB_REG_CASES(OP2, VAL)		\
+	READ_WB_REG_CASE(OP2, 0, VAL);		\
+	READ_WB_REG_CASE(OP2, 1, VAL);		\
+	READ_WB_REG_CASE(OP2, 2, VAL);		\
+	READ_WB_REG_CASE(OP2, 3, VAL);		\
+	READ_WB_REG_CASE(OP2, 4, VAL);		\
+	READ_WB_REG_CASE(OP2, 5, VAL);		\
+	READ_WB_REG_CASE(OP2, 6, VAL);		\
+	READ_WB_REG_CASE(OP2, 7, VAL);		\
+	READ_WB_REG_CASE(OP2, 8, VAL);		\
+	READ_WB_REG_CASE(OP2, 9, VAL);		\
+	READ_WB_REG_CASE(OP2, 10, VAL);		\
+	READ_WB_REG_CASE(OP2, 11, VAL);		\
+	READ_WB_REG_CASE(OP2, 12, VAL);		\
+	READ_WB_REG_CASE(OP2, 13, VAL);		\
+	READ_WB_REG_CASE(OP2, 14, VAL);		\
+	READ_WB_REG_CASE(OP2, 15, VAL)
+
+#define GEN_WRITE_WB_REG_CASES(OP2, VAL)	\
+	WRITE_WB_REG_CASE(OP2, 0, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 1, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 2, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 3, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 4, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 5, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 6, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 7, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 8, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 9, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 10, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 11, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 12, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 13, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 14, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 15, VAL)
+
+static u32 read_wb_reg(int n)
+{
+	u32 val = 0;
+
+	switch (n) {
+	GEN_READ_WB_REG_CASES(ARM_OP2_BVR, val);
+	GEN_READ_WB_REG_CASES(ARM_OP2_BCR, val);
+	GEN_READ_WB_REG_CASES(ARM_OP2_WVR, val);
+	GEN_READ_WB_REG_CASES(ARM_OP2_WCR, val);
+	default:
+		pr_warn("attempt to read from unknown breakpoint register %d\n",
+			n);
+	}
+
+	return val;
+}
+
+static void write_wb_reg(int n, u32 val)
+{
+	switch (n) {
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_BVR, val);
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_BCR, val);
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_WVR, val);
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_WCR, val);
+	default:
+		pr_warn("attempt to write to unknown breakpoint register %d\n",
+			n);
+	}
+	isb();
+}
+
+/* Determine debug architecture. */
+static u8 get_debug_arch(void)
+{
+	u32 didr;
+
+	/* Do we implement the extended CPUID interface? */
+	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
+		pr_warn_once("CPUID feature registers not supported. "
+			     "Assuming v6 debug is present.\n");
+		return ARM_DEBUG_ARCH_V6;
+	}
+
+	ARM_DBG_READ(c0, c0, 0, didr);
+	return (didr >> 16) & 0xf;
+}
+
+u8 arch_get_debug_arch(void)
+{
+	return debug_arch;
+}
+
+static int debug_arch_supported(void)
+{
+	u8 arch = get_debug_arch();
+
+	/* We don't support the memory-mapped interface. */
+	return (arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14) ||
+		arch >= ARM_DEBUG_ARCH_V7_1;
+}
+
+/* Can we determine the watchpoint access type from the fsr? */
+static int debug_exception_updates_fsr(void)
+{
+	return get_debug_arch() >= ARM_DEBUG_ARCH_V8;
+}
+
+/* Determine number of WRP registers available. */
+static int get_num_wrp_resources(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, c0, 0, didr);
+	return ((didr >> 28) & 0xf) + 1;
+}
+
+/* Determine number of BRP registers available. */
+static int get_num_brp_resources(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, c0, 0, didr);
+	return ((didr >> 24) & 0xf) + 1;
+}
+
+/* Does this core support mismatch breakpoints? */
+static int core_has_mismatch_brps(void)
+{
+	return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 &&
+		get_num_brp_resources() > 1);
+}
+
+/* Determine number of usable WRPs available. */
+static int get_num_wrps(void)
+{
+	/*
+	 * On debug architectures prior to 7.1, when a watchpoint fires, the
+	 * only way to work out which watchpoint it was is by disassembling
+	 * the faulting instruction and working out the address of the memory
+	 * access.
+	 *
+	 * Furthermore, we can only do this if the watchpoint was precise
+	 * since imprecise watchpoints prevent us from calculating register
+	 * based addresses.
+	 *
+	 * Providing we have more than 1 breakpoint register, we only report
+	 * a single watchpoint register for the time being. This way, we always
+	 * know which watchpoint fired. In the future we can either add a
+	 * disassembler and address generation emulator, or we can insert a
+	 * check to see if the DFAR is set on watchpoint exception entry
+	 * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
+	 * that it is set on some implementations].
+	 */
+	if (get_debug_arch() < ARM_DEBUG_ARCH_V7_1)
+		return 1;
+
+	return get_num_wrp_resources();
+}
+
+/* Determine number of usable BRPs available. */
+static int get_num_brps(void)
+{
+	int brps = get_num_brp_resources();
+	return core_has_mismatch_brps() ? brps - 1 : brps;
+}
+
+/*
+ * In order to access the breakpoint/watchpoint control registers,
+ * we must be running in debug monitor mode. Unfortunately, we can
+ * be put into halting debug mode at any time by an external debugger
+ * but there is nothing we can do to prevent that.
+ */
+static int monitor_mode_enabled(void)
+{
+	u32 dscr;
+	ARM_DBG_READ(c0, c1, 0, dscr);
+	return !!(dscr & ARM_DSCR_MDBGEN);
+}
+
+static int enable_monitor_mode(void)
+{
+	u32 dscr;
+	ARM_DBG_READ(c0, c1, 0, dscr);
+
+	/* If monitor mode is already enabled, just return. */
+	if (dscr & ARM_DSCR_MDBGEN)
+		goto out;
+
+	/* Write to the corresponding DSCR. */
+	switch (get_debug_arch()) {
+	case ARM_DEBUG_ARCH_V6:
+	case ARM_DEBUG_ARCH_V6_1:
+		ARM_DBG_WRITE(c0, c1, 0, (dscr | ARM_DSCR_MDBGEN));
+		break;
+	case ARM_DEBUG_ARCH_V7_ECP14:
+	case ARM_DEBUG_ARCH_V7_1:
+	case ARM_DEBUG_ARCH_V8:
+		ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN));
+		isb();
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	/* Check that the write made it through. */
+	ARM_DBG_READ(c0, c1, 0, dscr);
+	if (!(dscr & ARM_DSCR_MDBGEN)) {
+		pr_warn_once("Failed to enable monitor mode on CPU %d.\n",
+				smp_processor_id());
+		return -EPERM;
+	}
+
+out:
+	return 0;
+}
+
+int hw_breakpoint_slots(int type)
+{
+	if (!debug_arch_supported())
+		return 0;
+
+	/*
+	 * We can be called early, so don't rely on
+	 * our static variables being initialised.
+	 */
+	switch (type) {
+	case TYPE_INST:
+		return get_num_brps();
+	case TYPE_DATA:
+		return get_num_wrps();
+	default:
+		pr_warn("unknown slot type: %d\n", type);
+		return 0;
+	}
+}
+
+/*
+ * Check if 8-bit byte-address select is available.
+ * This clobbers WRP 0.
+ */
+static u8 get_max_wp_len(void)
+{
+	u32 ctrl_reg;
+	struct arch_hw_breakpoint_ctrl ctrl;
+	u8 size = 4;
+
+	if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14)
+		goto out;
+
+	memset(&ctrl, 0, sizeof(ctrl));
+	ctrl.len = ARM_BREAKPOINT_LEN_8;
+	ctrl_reg = encode_ctrl_reg(ctrl);
+
+	write_wb_reg(ARM_BASE_WVR, 0);
+	write_wb_reg(ARM_BASE_WCR, ctrl_reg);
+	if ((read_wb_reg(ARM_BASE_WCR) & ctrl_reg) == ctrl_reg)
+		size = 8;
+
+out:
+	return size;
+}
+
+u8 arch_get_max_wp_len(void)
+{
+	return max_watchpoint_len;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slot, **slots;
+	int i, max_slots, ctrl_base, val_base;
+	u32 addr, ctrl;
+
+	addr = info->address;
+	ctrl = encode_ctrl_reg(info->ctrl) | 0x1;
+
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
+		/* Breakpoint */
+		ctrl_base = ARM_BASE_BCR;
+		val_base = ARM_BASE_BVR;
+		slots = this_cpu_ptr(bp_on_reg);
+		max_slots = core_num_brps;
+	} else {
+		/* Watchpoint */
+		ctrl_base = ARM_BASE_WCR;
+		val_base = ARM_BASE_WVR;
+		slots = this_cpu_ptr(wp_on_reg);
+		max_slots = core_num_wrps;
+	}
+
+	for (i = 0; i < max_slots; ++i) {
+		slot = &slots[i];
+
+		if (!*slot) {
+			*slot = bp;
+			break;
+		}
+	}
+
+	if (i == max_slots) {
+		pr_warn("Can't find any breakpoint slot\n");
+		return -EBUSY;
+	}
+
+	/* Override the breakpoint data with the step data. */
+	if (info->step_ctrl.enabled) {
+		addr = info->trigger & ~0x3;
+		ctrl = encode_ctrl_reg(info->step_ctrl);
+		if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE) {
+			i = 0;
+			ctrl_base = ARM_BASE_BCR + core_num_brps;
+			val_base = ARM_BASE_BVR + core_num_brps;
+		}
+	}
+
+	/* Setup the address register. */
+	write_wb_reg(val_base + i, addr);
+
+	/* Setup the control register. */
+	write_wb_reg(ctrl_base + i, ctrl);
+	return 0;
+}
+
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slot, **slots;
+	int i, max_slots, base;
+
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
+		/* Breakpoint */
+		base = ARM_BASE_BCR;
+		slots = this_cpu_ptr(bp_on_reg);
+		max_slots = core_num_brps;
+	} else {
+		/* Watchpoint */
+		base = ARM_BASE_WCR;
+		slots = this_cpu_ptr(wp_on_reg);
+		max_slots = core_num_wrps;
+	}
+
+	/* Remove the breakpoint. */
+	for (i = 0; i < max_slots; ++i) {
+		slot = &slots[i];
+
+		if (*slot == bp) {
+			*slot = NULL;
+			break;
+		}
+	}
+
+	if (i == max_slots) {
+		pr_warn("Can't find any breakpoint slot\n");
+		return;
+	}
+
+	/* Ensure that we disable the mismatch breakpoint. */
+	if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE &&
+	    info->step_ctrl.enabled) {
+		i = 0;
+		base = ARM_BASE_BCR + core_num_brps;
+	}
+
+	/* Reset the control register. */
+	write_wb_reg(base + i, 0);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+	unsigned int len_in_bytes = 0;
+
+	switch (hbp_len) {
+	case ARM_BREAKPOINT_LEN_1:
+		len_in_bytes = 1;
+		break;
+	case ARM_BREAKPOINT_LEN_2:
+		len_in_bytes = 2;
+		break;
+	case ARM_BREAKPOINT_LEN_4:
+		len_in_bytes = 4;
+		break;
+	case ARM_BREAKPOINT_LEN_8:
+		len_in_bytes = 8;
+		break;
+	}
+
+	return len_in_bytes;
+}
+
+/*
+ * Check whether bp virtual address is in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
+{
+	unsigned int len;
+	unsigned long va;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	va = info->address;
+	len = get_hbp_len(info->ctrl.len);
+
+	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl.
+ * Hopefully this will disappear when ptrace can bypass the conversion
+ * to generic breakpoint descriptions.
+ */
+int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+			   int *gen_len, int *gen_type)
+{
+	/* Type */
+	switch (ctrl.type) {
+	case ARM_BREAKPOINT_EXECUTE:
+		*gen_type = HW_BREAKPOINT_X;
+		break;
+	case ARM_BREAKPOINT_LOAD:
+		*gen_type = HW_BREAKPOINT_R;
+		break;
+	case ARM_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_W;
+		break;
+	case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_RW;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (ctrl.len) {
+	case ARM_BREAKPOINT_LEN_1:
+		*gen_len = HW_BREAKPOINT_LEN_1;
+		break;
+	case ARM_BREAKPOINT_LEN_2:
+		*gen_len = HW_BREAKPOINT_LEN_2;
+		break;
+	case ARM_BREAKPOINT_LEN_4:
+		*gen_len = HW_BREAKPOINT_LEN_4;
+		break;
+	case ARM_BREAKPOINT_LEN_8:
+		*gen_len = HW_BREAKPOINT_LEN_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Construct an arch_hw_breakpoint from a perf_event.
+ */
+static int arch_build_bp_info(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	/* Type */
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_X:
+		info->ctrl.type = ARM_BREAKPOINT_EXECUTE;
+		break;
+	case HW_BREAKPOINT_R:
+		info->ctrl.type = ARM_BREAKPOINT_LOAD;
+		break;
+	case HW_BREAKPOINT_W:
+		info->ctrl.type = ARM_BREAKPOINT_STORE;
+		break;
+	case HW_BREAKPOINT_RW:
+		info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (bp->attr.bp_len) {
+	case HW_BREAKPOINT_LEN_1:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_1;
+		break;
+	case HW_BREAKPOINT_LEN_2:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_2;
+		break;
+	case HW_BREAKPOINT_LEN_4:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+		break;
+	case HW_BREAKPOINT_LEN_8:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_8;
+		if ((info->ctrl.type != ARM_BREAKPOINT_EXECUTE)
+			&& max_watchpoint_len >= 8)
+			break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes.
+	 * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported
+	 * by the hardware and must be aligned to the appropriate number of
+	 * bytes.
+	 */
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
+	    info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
+	    info->ctrl.len != ARM_BREAKPOINT_LEN_4)
+		return -EINVAL;
+
+	/* Address */
+	info->address = bp->attr.bp_addr;
+
+	/* Privilege */
+	info->ctrl.privilege = ARM_BREAKPOINT_USER;
+	if (arch_check_bp_in_kernelspace(bp))
+		info->ctrl.privilege |= ARM_BREAKPOINT_PRIV;
+
+	/* Enabled? */
+	info->ctrl.enabled = !bp->attr.disabled;
+
+	/* Mismatch */
+	info->ctrl.mismatch = 0;
+
+	return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings.
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	int ret = 0;
+	u32 offset, alignment_mask = 0x3;
+
+	/* Ensure that we are in monitor debug mode. */
+	if (!monitor_mode_enabled())
+		return -ENODEV;
+
+	/* Build the arch_hw_breakpoint. */
+	ret = arch_build_bp_info(bp);
+	if (ret)
+		goto out;
+
+	/* Check address alignment. */
+	if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+		alignment_mask = 0x7;
+	offset = info->address & alignment_mask;
+	switch (offset) {
+	case 0:
+		/* Aligned */
+		break;
+	case 1:
+	case 2:
+		/* Allow halfword watchpoints and breakpoints. */
+		if (info->ctrl.len == ARM_BREAKPOINT_LEN_2)
+			break;
+	case 3:
+		/* Allow single byte watchpoint. */
+		if (info->ctrl.len == ARM_BREAKPOINT_LEN_1)
+			break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+
+	info->address &= ~alignment_mask;
+	info->ctrl.len <<= offset;
+
+	if (!bp->overflow_handler) {
+		/*
+		 * Mismatch breakpoints are required for single-stepping
+		 * breakpoints.
+		 */
+		if (!core_has_mismatch_brps())
+			return -EINVAL;
+
+		/* We don't allow mismatch breakpoints in kernel space. */
+		if (arch_check_bp_in_kernelspace(bp))
+			return -EPERM;
+
+		/*
+		 * Per-cpu breakpoints are not supported by our stepping
+		 * mechanism.
+		 */
+		if (!bp->hw.target)
+			return -EINVAL;
+
+		/*
+		 * We only support specific access types if the fsr
+		 * reports them.
+		 */
+		if (!debug_exception_updates_fsr() &&
+		    (info->ctrl.type == ARM_BREAKPOINT_LOAD ||
+		     info->ctrl.type == ARM_BREAKPOINT_STORE))
+			return -EINVAL;
+	}
+
+out:
+	return ret;
+}
+
+/*
+ * Enable/disable single-stepping over the breakpoint bp at address addr.
+ */
+static void enable_single_step(struct perf_event *bp, u32 addr)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	arch_uninstall_hw_breakpoint(bp);
+	info->step_ctrl.mismatch  = 1;
+	info->step_ctrl.len	  = ARM_BREAKPOINT_LEN_4;
+	info->step_ctrl.type	  = ARM_BREAKPOINT_EXECUTE;
+	info->step_ctrl.privilege = info->ctrl.privilege;
+	info->step_ctrl.enabled	  = 1;
+	info->trigger		  = addr;
+	arch_install_hw_breakpoint(bp);
+}
+
+static void disable_single_step(struct perf_event *bp)
+{
+	arch_uninstall_hw_breakpoint(bp);
+	counter_arch_bp(bp)->step_ctrl.enabled = 0;
+	arch_install_hw_breakpoint(bp);
+}
+
+static void watchpoint_handler(unsigned long addr, unsigned int fsr,
+			       struct pt_regs *regs)
+{
+	int i, access;
+	u32 val, ctrl_reg, alignment_mask;
+	struct perf_event *wp, **slots;
+	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
+
+	slots = this_cpu_ptr(wp_on_reg);
+
+	for (i = 0; i < core_num_wrps; ++i) {
+		rcu_read_lock();
+
+		wp = slots[i];
+
+		if (wp == NULL)
+			goto unlock;
+
+		info = counter_arch_bp(wp);
+		/*
+		 * The DFAR is an unknown value on debug architectures prior
+		 * to 7.1. Since we only allow a single watchpoint on these
+		 * older CPUs, we can set the trigger to the lowest possible
+		 * faulting address.
+		 */
+		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
+			BUG_ON(i > 0);
+			info->trigger = wp->attr.bp_addr;
+		} else {
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+				alignment_mask = 0x7;
+			else
+				alignment_mask = 0x3;
+
+			/* Check if the watchpoint value matches. */
+			val = read_wb_reg(ARM_BASE_WVR + i);
+			if (val != (addr & ~alignment_mask))
+				goto unlock;
+
+			/* Possible match, check the byte address select. */
+			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+			decode_ctrl_reg(ctrl_reg, &ctrl);
+			if (!((1 << (addr & alignment_mask)) & ctrl.len))
+				goto unlock;
+
+			/* Check that the access type matches. */
+			if (debug_exception_updates_fsr()) {
+				access = (fsr & ARM_FSR_ACCESS_MASK) ?
+					  HW_BREAKPOINT_W : HW_BREAKPOINT_R;
+				if (!(access & hw_breakpoint_type(wp)))
+					goto unlock;
+			}
+
+			/* We have a winner. */
+			info->trigger = addr;
+		}
+
+		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
+		perf_bp_event(wp, regs);
+
+		/*
+		 * If no overflow handler is present, insert a temporary
+		 * mismatch breakpoint so we can single-step over the
+		 * watchpoint trigger.
+		 */
+		if (!wp->overflow_handler)
+			enable_single_step(wp, instruction_pointer(regs));
+
+unlock:
+		rcu_read_unlock();
+	}
+}
+
+static void watchpoint_single_step_handler(unsigned long pc)
+{
+	int i;
+	struct perf_event *wp, **slots;
+	struct arch_hw_breakpoint *info;
+
+	slots = this_cpu_ptr(wp_on_reg);
+
+	for (i = 0; i < core_num_wrps; ++i) {
+		rcu_read_lock();
+
+		wp = slots[i];
+
+		if (wp == NULL)
+			goto unlock;
+
+		info = counter_arch_bp(wp);
+		if (!info->step_ctrl.enabled)
+			goto unlock;
+
+		/*
+		 * Restore the original watchpoint if we've completed the
+		 * single-step.
+		 */
+		if (info->trigger != pc)
+			disable_single_step(wp);
+
+unlock:
+		rcu_read_unlock();
+	}
+}
+
+static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
+{
+	int i;
+	u32 ctrl_reg, val, addr;
+	struct perf_event *bp, **slots;
+	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
+
+	slots = this_cpu_ptr(bp_on_reg);
+
+	/* The exception entry code places the amended lr in the PC. */
+	addr = regs->ARM_pc;
+
+	/* Check the currently installed breakpoints first. */
+	for (i = 0; i < core_num_brps; ++i) {
+		rcu_read_lock();
+
+		bp = slots[i];
+
+		if (bp == NULL)
+			goto unlock;
+
+		info = counter_arch_bp(bp);
+
+		/* Check if the breakpoint value matches. */
+		val = read_wb_reg(ARM_BASE_BVR + i);
+		if (val != (addr & ~0x3))
+			goto mismatch;
+
+		/* Possible match, check the byte address select to confirm. */
+		ctrl_reg = read_wb_reg(ARM_BASE_BCR + i);
+		decode_ctrl_reg(ctrl_reg, &ctrl);
+		if ((1 << (addr & 0x3)) & ctrl.len) {
+			info->trigger = addr;
+			pr_debug("breakpoint fired: address = 0x%x\n", addr);
+			perf_bp_event(bp, regs);
+			if (!bp->overflow_handler)
+				enable_single_step(bp, addr);
+			goto unlock;
+		}
+
+mismatch:
+		/* If we're stepping a breakpoint, it can now be restored. */
+		if (info->step_ctrl.enabled)
+			disable_single_step(bp);
+unlock:
+		rcu_read_unlock();
+	}
+
+	/* Handle any pending watchpoint single-step breakpoints. */
+	watchpoint_single_step_handler(addr);
+}
+
+/*
+ * Called from either the Data Abort Handler [watchpoint] or the
+ * Prefetch Abort Handler [breakpoint] with interrupts disabled.
+ */
+static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
+				 struct pt_regs *regs)
+{
+	int ret = 0;
+	u32 dscr;
+
+	preempt_disable();
+
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
+	/* We only handle watchpoints and hardware breakpoints. */
+	ARM_DBG_READ(c0, c1, 0, dscr);
+
+	/* Perform perf callbacks. */
+	switch (ARM_DSCR_MOE(dscr)) {
+	case ARM_ENTRY_BREAKPOINT:
+		breakpoint_handler(addr, regs);
+		break;
+	case ARM_ENTRY_ASYNC_WATCHPOINT:
+		WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n");
+	case ARM_ENTRY_SYNC_WATCHPOINT:
+		watchpoint_handler(addr, fsr, regs);
+		break;
+	default:
+		ret = 1; /* Unhandled fault. */
+	}
+
+	preempt_enable();
+
+	return ret;
+}
+
+/*
+ * One-time initialisation.
+ */
+static cpumask_t debug_err_mask;
+
+static int debug_reg_trap(struct pt_regs *regs, unsigned int instr)
+{
+	int cpu = smp_processor_id();
+
+	pr_warn("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
+		instr, cpu);
+
+	/* Set the error flag for this CPU and skip the faulting instruction. */
+	cpumask_set_cpu(cpu, &debug_err_mask);
+	instruction_pointer(regs) += 4;
+	return 0;
+}
+
+static struct undef_hook debug_reg_hook = {
+	.instr_mask	= 0x0fe80f10,
+	.instr_val	= 0x0e000e10,
+	.fn		= debug_reg_trap,
+};
+
+/* Does this core support OS Save and Restore? */
+static bool core_has_os_save_restore(void)
+{
+	u32 oslsr;
+
+	switch (get_debug_arch()) {
+	case ARM_DEBUG_ARCH_V7_1:
+		return true;
+	case ARM_DEBUG_ARCH_V7_ECP14:
+		ARM_DBG_READ(c1, c1, 4, oslsr);
+		if (oslsr & ARM_OSLSR_OSLM0)
+			return true;
+	default:
+		return false;
+	}
+}
+
+static void reset_ctrl_regs(void *unused)
+{
+	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
+	u32 val;
+
+	/*
+	 * v7 debug contains save and restore registers so that debug state
+	 * can be maintained across low-power modes without leaving the debug
+	 * logic powered up. It is IMPLEMENTATION DEFINED whether we can access
+	 * the debug registers out of reset, so we must unlock the OS Lock
+	 * Access Register to avoid taking undefined instruction exceptions
+	 * later on.
+	 */
+	switch (debug_arch) {
+	case ARM_DEBUG_ARCH_V6:
+	case ARM_DEBUG_ARCH_V6_1:
+		/* ARMv6 cores clear the registers out of reset. */
+		goto out_mdbgen;
+	case ARM_DEBUG_ARCH_V7_ECP14:
+		/*
+		 * Ensure sticky power-down is clear (i.e. debug logic is
+		 * powered up).
+		 */
+		ARM_DBG_READ(c1, c5, 4, val);
+		if ((val & 0x1) == 0)
+			err = -EPERM;
+
+		if (!has_ossr)
+			goto clear_vcr;
+		break;
+	case ARM_DEBUG_ARCH_V7_1:
+		/*
+		 * Ensure the OS double lock is clear.
+		 */
+		ARM_DBG_READ(c1, c3, 4, val);
+		if ((val & 0x1) == 1)
+			err = -EPERM;
+		break;
+	}
+
+	if (err) {
+		pr_warn_once("CPU %d debug is powered down!\n", cpu);
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
+		return;
+	}
+
+	/*
+	 * Unconditionally clear the OS lock by writing a value
+	 * other than CS_LAR_KEY to the access register.
+	 */
+	ARM_DBG_WRITE(c1, c0, 4, ~CORESIGHT_UNLOCK);
+	isb();
+
+	/*
+	 * Clear any configured vector-catch events before
+	 * enabling monitor mode.
+	 */
+clear_vcr:
+	ARM_DBG_WRITE(c0, c7, 0, 0);
+	isb();
+
+	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
+		pr_warn_once("CPU %d failed to disable vector catch\n", cpu);
+		return;
+	}
+
+	/*
+	 * The control/value register pairs are UNKNOWN out of reset so
+	 * clear them to avoid spurious debug events.
+	 */
+	raw_num_brps = get_num_brp_resources();
+	for (i = 0; i < raw_num_brps; ++i) {
+		write_wb_reg(ARM_BASE_BCR + i, 0UL);
+		write_wb_reg(ARM_BASE_BVR + i, 0UL);
+	}
+
+	for (i = 0; i < core_num_wrps; ++i) {
+		write_wb_reg(ARM_BASE_WCR + i, 0UL);
+		write_wb_reg(ARM_BASE_WVR + i, 0UL);
+	}
+
+	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
+		pr_warn_once("CPU %d failed to clear debug register pairs\n", cpu);
+		return;
+	}
+
+	/*
+	 * Have a crack at enabling monitor mode. We don't actually need
+	 * it yet, but reporting an error early is useful if it fails.
+	 */
+out_mdbgen:
+	if (enable_monitor_mode())
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
+}
+
+static int dbg_reset_notify(struct notifier_block *self,
+				      unsigned long action, void *cpu)
+{
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
+		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block dbg_reset_nb = {
+	.notifier_call = dbg_reset_notify,
+};
+
+#ifdef CONFIG_CPU_PM
+static int dbg_cpu_pm_notify(struct notifier_block *self, unsigned long action,
+			     void *v)
+{
+	if (action == CPU_PM_EXIT)
+		reset_ctrl_regs(NULL);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block dbg_cpu_pm_nb = {
+	.notifier_call = dbg_cpu_pm_notify,
+};
+
+static void __init pm_init(void)
+{
+	cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+}
+#else
+static inline void pm_init(void)
+{
+}
+#endif
+
+static int __init arch_hw_breakpoint_init(void)
+{
+	debug_arch = get_debug_arch();
+
+	if (!debug_arch_supported()) {
+		pr_info("debug architecture 0x%x unsupported.\n", debug_arch);
+		return 0;
+	}
+
+	has_ossr = core_has_os_save_restore();
+
+	/* Determine how many BRPs/WRPs are available. */
+	core_num_brps = get_num_brps();
+	core_num_wrps = get_num_wrps();
+
+	cpu_notifier_register_begin();
+
+	/*
+	 * We need to tread carefully here because DBGSWENABLE may be
+	 * driven low on this core and there isn't an architected way to
+	 * determine that.
+	 */
+	register_undef_hook(&debug_reg_hook);
+
+	/*
+	 * Reset the breakpoint resources. We assume that a halting
+	 * debugger will leave the world in a nice state for us.
+	 */
+	on_each_cpu(reset_ctrl_regs, NULL, 1);
+	unregister_undef_hook(&debug_reg_hook);
+	if (!cpumask_empty(&debug_err_mask)) {
+		core_num_brps = 0;
+		core_num_wrps = 0;
+		cpu_notifier_register_done();
+		return 0;
+	}
+
+	pr_info("found %d " "%s" "breakpoint and %d watchpoint registers.\n",
+		core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " :
+		"", core_num_wrps);
+
+	/* Work out the maximum supported watchpoint length. */
+	max_watchpoint_len = get_max_wp_len();
+	pr_info("maximum watchpoint size is %u bytes.\n",
+			max_watchpoint_len);
+
+	/* Register debug fault handler. */
+	hook_fault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
+			TRAP_HWBKPT, "watchpoint debug exception");
+	hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
+			TRAP_HWBKPT, "breakpoint debug exception");
+
+	/* Register hotplug and PM notifiers. */
+	__register_cpu_notifier(&dbg_reset_nb);
+
+	cpu_notifier_register_done();
+
+	pm_init();
+	return 0;
+}
+arch_initcall(arch_hw_breakpoint_init);
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+}
+
+/*
+ * Dummy function to register with die_notifier.
+ */
+int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+					unsigned long val, void *data)
+{
+	return NOTIFY_DONE;
+}
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
new file mode 100644
index 000000000..2a55373f4
--- /dev/null
+++ b/arch/arm/kernel/hyp-stub.S
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2012 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/virt.h>
+
+#ifndef ZIMAGE
+/*
+ * For the kernel proper, we need to find out the CPU boot mode long after
+ * boot, so we need to store it in a writable variable.
+ *
+ * This is not in .bss, because we set it sufficiently early that the boot-time
+ * zeroing of .bss would clobber it.
+ */
+.data
+ENTRY(__boot_cpu_mode)
+	.long	0
+.text
+
+	/*
+	 * Save the primary CPU boot mode. Requires 3 scratch registers.
+	 */
+	.macro	store_primary_cpu_mode	reg1, reg2, reg3
+	mrs	\reg1, cpsr
+	and	\reg1, \reg1, #MODE_MASK
+	adr	\reg2, .L__boot_cpu_mode_offset
+	ldr	\reg3, [\reg2]
+	str	\reg1, [\reg2, \reg3]
+	.endm
+
+	/*
+	 * Compare the current mode with the one saved on the primary CPU.
+	 * If they don't match, record that fact. The Z bit indicates
+	 * if there's a match or not.
+	 * Requires 3 additionnal scratch registers.
+	 */
+	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
+	adr	\reg2, .L__boot_cpu_mode_offset
+	ldr	\reg3, [\reg2]
+	ldr	\reg1, [\reg2, \reg3]
+	cmp	\mode, \reg1		@ matches primary CPU boot mode?
+	orrne	\reg1, \reg1, #BOOT_CPU_MODE_MISMATCH
+	strne	\reg1, [\reg2, \reg3]	@ record what happened and give up
+	.endm
+
+#else	/* ZIMAGE */
+
+	.macro	store_primary_cpu_mode	reg1:req, reg2:req, reg3:req
+	.endm
+
+/*
+ * The zImage loader only runs on one CPU, so we don't bother with mult-CPU
+ * consistency checking:
+ */
+	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
+	cmp	\mode, \mode
+	.endm
+
+#endif /* ZIMAGE */
+
+/*
+ * Hypervisor stub installation functions.
+ *
+ * These must be called with the MMU and D-cache off.
+ * They are not ABI compliant and are only intended to be called from the kernel
+ * entry points in head.S.
+ */
+@ Call this from the primary CPU
+ENTRY(__hyp_stub_install)
+	store_primary_cpu_mode	r4, r5, r6
+ENDPROC(__hyp_stub_install)
+
+	@ fall through...
+
+@ Secondary CPUs should call here
+ENTRY(__hyp_stub_install_secondary)
+	mrs	r4, cpsr
+	and	r4, r4, #MODE_MASK
+
+	/*
+	 * If the secondary has booted with a different mode, give up
+	 * immediately.
+	 */
+	compare_cpu_mode_with_primary	r4, r5, r6, r7
+	retne	lr
+
+	/*
+	 * Once we have given up on one CPU, we do not try to install the
+	 * stub hypervisor on the remaining ones: because the saved boot mode
+	 * is modified, it can't compare equal to the CPSR mode field any
+	 * more.
+	 *
+	 * Otherwise...
+	 */
+
+	cmp	r4, #HYP_MODE
+	retne	lr			@ give up if the CPU is not in HYP mode
+
+/*
+ * Configure HSCTLR to set correct exception endianness/instruction set
+ * state etc.
+ * Turn off all traps
+ * Eventually, CPU-specific code might be needed -- assume not for now
+ *
+ * This code relies on the "eret" instruction to synchronize the
+ * various coprocessor accesses. This is done when we switch to SVC
+ * (see safe_svcmode_maskall).
+ */
+	@ Now install the hypervisor stub:
+	adr	r7, __hyp_stub_vectors
+	mcr	p15, 4, r7, c12, c0, 0	@ set hypervisor vector base (HVBAR)
+
+	@ Disable all traps, so we don't get any nasty surprise
+	mov	r7, #0
+	mcr	p15, 4, r7, c1, c1, 0	@ HCR
+	mcr	p15, 4, r7, c1, c1, 2	@ HCPTR
+	mcr	p15, 4, r7, c1, c1, 3	@ HSTR
+
+THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
+ARM_BE8(orr	r7, r7, #(1 << 25))     @ HSCTLR.EE
+	mcr	p15, 4, r7, c1, c0, 0	@ HSCTLR
+
+	mrc	p15, 4, r7, c1, c1, 1	@ HDCR
+	and	r7, #0x1f		@ Preserve HPMN
+	mcr	p15, 4, r7, c1, c1, 1	@ HDCR
+
+#if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER)
+	@ make CNTP_* and CNTPCT accessible from PL1
+	mrc	p15, 0, r7, c0, c1, 1	@ ID_PFR1
+	lsr	r7, #16
+	and	r7, #0xf
+	cmp	r7, #1
+	bne	1f
+	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
+	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+	mov	r7, #0
+	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
+
+	@ Disable virtual timer in case it was counting
+	mrc	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
+	bic	r7, #1			@ Clear ENABLE
+	mcr	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
+1:
+#endif
+
+	bx	lr			@ The boot CPU mode is left in r4.
+ENDPROC(__hyp_stub_install_secondary)
+
+__hyp_stub_do_trap:
+	cmp	r0, #-1
+	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
+	mcrne	p15, 4, r0, c12, c0, 0	@ set HVBAR
+	__ERET
+ENDPROC(__hyp_stub_do_trap)
+
+/*
+ * __hyp_set_vectors: Call this after boot to set the initial hypervisor
+ * vectors as part of hypervisor installation.  On an SMP system, this should
+ * be called on each CPU.
+ *
+ * r0 must be the physical address of the new vector table (which must lie in
+ * the bottom 4GB of physical address space.
+ *
+ * r0 must be 32-byte aligned.
+ *
+ * Before calling this, you must check that the stub hypervisor is installed
+ * everywhere, by waiting for any secondary CPUs to be brought up and then
+ * checking that BOOT_CPU_MODE_HAVE_HYP(__boot_cpu_mode) is true.
+ *
+ * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or
+ * something else went wrong... in such cases, trying to install a new
+ * hypervisor is unlikely to work as desired.
+ *
+ * When you call into your shiny new hypervisor, sp_hyp will contain junk,
+ * so you will need to set that to something sensible at the new hypervisor's
+ * initialisation entry point.
+ */
+ENTRY(__hyp_get_vectors)
+	mov	r0, #-1
+ENDPROC(__hyp_get_vectors)
+	@ fall through
+ENTRY(__hyp_set_vectors)
+	__HVC(0)
+	ret	lr
+ENDPROC(__hyp_set_vectors)
+
+#ifndef ZIMAGE
+.align 2
+.L__boot_cpu_mode_offset:
+	.long	__boot_cpu_mode - .
+#endif
+
+.align 5
+__hyp_stub_vectors:
+__hyp_stub_reset:	W(b)	.
+__hyp_stub_und:		W(b)	.
+__hyp_stub_svc:		W(b)	.
+__hyp_stub_pabort:	W(b)	.
+__hyp_stub_dabort:	W(b)	.
+__hyp_stub_trap:	W(b)	__hyp_stub_do_trap
+__hyp_stub_irq:		W(b)	.
+__hyp_stub_fiq:		W(b)	.
+ENDPROC(__hyp_stub_vectors)
+
diff --git a/arch/arm/kernel/insn.c b/arch/arm/kernel/insn.c
new file mode 100644
index 000000000..b760340b7
--- /dev/null
+++ b/arch/arm/kernel/insn.c
@@ -0,0 +1,62 @@
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <asm/opcodes.h>
+
+static unsigned long
+__arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link)
+{
+	unsigned long s, j1, j2, i1, i2, imm10, imm11;
+	unsigned long first, second;
+	long offset;
+
+	offset = (long)addr - (long)(pc + 4);
+	if (offset < -16777216 || offset > 16777214) {
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+
+	s	= (offset >> 24) & 0x1;
+	i1	= (offset >> 23) & 0x1;
+	i2	= (offset >> 22) & 0x1;
+	imm10	= (offset >> 12) & 0x3ff;
+	imm11	= (offset >>  1) & 0x7ff;
+
+	j1 = (!i1) ^ s;
+	j2 = (!i2) ^ s;
+
+	first = 0xf000 | (s << 10) | imm10;
+	second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
+	if (link)
+		second |= 1 << 14;
+
+	return __opcode_thumb32_compose(first, second);
+}
+
+static unsigned long
+__arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link)
+{
+	unsigned long opcode = 0xea000000;
+	long offset;
+
+	if (link)
+		opcode |= 1 << 24;
+
+	offset = (long)addr - (long)(pc + 8);
+	if (unlikely(offset < -33554432 || offset > 33554428)) {
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+
+	offset = (offset >> 2) & 0x00ffffff;
+
+	return opcode | offset;
+}
+
+unsigned long
+__arm_gen_branch(unsigned long pc, unsigned long addr, bool link)
+{
+	if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
+		return __arm_gen_branch_thumb2(pc, addr, link);
+	else
+		return __arm_gen_branch_arm(pc, addr, link);
+}
diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c
new file mode 100644
index 000000000..eedefe050
--- /dev/null
+++ b/arch/arm/kernel/io.c
@@ -0,0 +1,84 @@
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+
+static DEFINE_RAW_SPINLOCK(__io_lock);
+
+/*
+ * Generic atomic MMIO modify.
+ *
+ * Allows thread-safe access to registers shared by unrelated subsystems.
+ * The access is protected by a single MMIO-wide lock.
+ */
+void atomic_io_modify_relaxed(void __iomem *reg, u32 mask, u32 set)
+{
+	unsigned long flags;
+	u32 value;
+
+	raw_spin_lock_irqsave(&__io_lock, flags);
+	value = readl_relaxed(reg) & ~mask;
+	value |= (set & mask);
+	writel_relaxed(value, reg);
+	raw_spin_unlock_irqrestore(&__io_lock, flags);
+}
+EXPORT_SYMBOL(atomic_io_modify_relaxed);
+
+void atomic_io_modify(void __iomem *reg, u32 mask, u32 set)
+{
+	unsigned long flags;
+	u32 value;
+
+	raw_spin_lock_irqsave(&__io_lock, flags);
+	value = readl_relaxed(reg) & ~mask;
+	value |= (set & mask);
+	writel(value, reg);
+	raw_spin_unlock_irqrestore(&__io_lock, flags);
+}
+EXPORT_SYMBOL(atomic_io_modify);
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ * This needs to be optimized.
+ */
+void _memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+	unsigned char *t = to;
+	while (count) {
+		count--;
+		*t = readb(from);
+		t++;
+		from++;
+	}
+}
+EXPORT_SYMBOL(_memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ * This needs to be optimized.
+ */
+void _memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+	const unsigned char *f = from;
+	while (count) {
+		count--;
+		writeb(*f, to);
+		f++;
+		to++;
+	}
+}
+EXPORT_SYMBOL(_memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ * This needs to be optimized.
+ */
+void _memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+	while (count) {
+		count--;
+		writeb(c, dst);
+		dst++;
+	}
+}
+EXPORT_SYMBOL(_memset_io);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
new file mode 100644
index 000000000..350f188c9
--- /dev/null
+++ b/arch/arm/kernel/irq.c
@@ -0,0 +1,198 @@
+/*
+ *  linux/arch/arm/kernel/irq.c
+ *
+ *  Copyright (C) 1992 Linus Torvalds
+ *  Modifications for ARM processor Copyright (C) 1995-2000 Russell King.
+ *
+ *  Support for Dynamic Tick Timer Copyright (C) 2004-2005 Nokia Corporation.
+ *  Dynamic Tick Timer written by Tony Lindgren <tony@atomide.com> and
+ *  Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains the code used by various IRQ handling routines:
+ *  asking for different IRQ's should be done through these routines
+ *  instead of just grabbing them. Thus setups with different IRQ numbers
+ *  shouldn't result in any weird surprises, and installing new handlers
+ *  should be easier.
+ *
+ *  IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ *  Naturally it's not a 1:1 relation, but there are similarities.
+ */
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/ratelimit.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/kallsyms.h>
+#include <linux/proc_fs.h>
+#include <linux/export.h>
+
+#include <asm/hardware/cache-l2x0.h>
+#include <asm/exception.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/time.h>
+
+unsigned long irq_err_count;
+
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#ifdef CONFIG_FIQ
+	show_fiq_list(p, prec);
+#endif
+#ifdef CONFIG_SMP
+	show_ipi_list(p, prec);
+#endif
+	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
+	return 0;
+}
+
+/*
+ * handle_IRQ handles all hardware IRQ's.  Decoded IRQs should
+ * not come via this function.  Instead, they should provide their
+ * own 'handler'.  Used by platform code implementing C-based 1st
+ * level decoding.
+ */
+void handle_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+	__handle_domain_irq(NULL, irq, false, regs);
+}
+
+/*
+ * asm_do_IRQ is the interface to be used from assembly code.
+ */
+asmlinkage void __exception_irq_entry
+asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+	handle_IRQ(irq, regs);
+}
+
+void set_irq_flags(unsigned int irq, unsigned int iflags)
+{
+	unsigned long clr = 0, set = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+
+	if (irq >= nr_irqs) {
+		pr_err("Trying to set irq flags for IRQ%d\n", irq);
+		return;
+	}
+
+	if (iflags & IRQF_VALID)
+		clr |= IRQ_NOREQUEST;
+	if (iflags & IRQF_PROBE)
+		clr |= IRQ_NOPROBE;
+	if (!(iflags & IRQF_NOAUTOEN))
+		clr |= IRQ_NOAUTOEN;
+	/* Order is clear bits in "clr" then set bits in "set" */
+	irq_modify_status(irq, clr, set & ~clr);
+}
+EXPORT_SYMBOL_GPL(set_irq_flags);
+
+void __init init_IRQ(void)
+{
+	int ret;
+
+	if (IS_ENABLED(CONFIG_OF) && !machine_desc->init_irq)
+		irqchip_init();
+	else
+		machine_desc->init_irq();
+
+	if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) &&
+	    (machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) {
+		if (!outer_cache.write_sec)
+			outer_cache.write_sec = machine_desc->l2c_write_sec;
+		ret = l2x0_of_init(machine_desc->l2c_aux_val,
+				   machine_desc->l2c_aux_mask);
+		if (ret)
+			pr_err("L2C: failed to init: %d\n", ret);
+	}
+}
+
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
+{
+	if (handle_arch_irq)
+		return;
+
+	handle_arch_irq = handle_irq;
+}
+#endif
+
+#ifdef CONFIG_SPARSE_IRQ
+int __init arch_probe_nr_irqs(void)
+{
+	nr_irqs = machine_desc->nr_irqs ? machine_desc->nr_irqs : NR_IRQS;
+	return nr_irqs;
+}
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+static bool migrate_one_irq(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = d->affinity;
+	struct irq_chip *c;
+	bool ret = false;
+
+	/*
+	 * If this is a per-CPU interrupt, or the affinity does not
+	 * include this CPU, then we have nothing to do.
+	 */
+	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+		return false;
+
+	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		affinity = cpu_online_mask;
+		ret = true;
+	}
+
+	c = irq_data_get_irq_chip(d);
+	if (!c->irq_set_affinity)
+		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
+	else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
+		cpumask_copy(d->affinity, affinity);
+
+	return ret;
+}
+
+/*
+ * The current CPU has been marked offline.  Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
+ */
+void migrate_irqs(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	for_each_irq_desc(i, desc) {
+		bool affinity_broken;
+
+		raw_spin_lock(&desc->lock);
+		affinity_broken = migrate_one_irq(desc);
+		raw_spin_unlock(&desc->lock);
+
+		if (affinity_broken)
+			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+				i, smp_processor_id());
+	}
+
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c
new file mode 100644
index 000000000..9d1cf7156
--- /dev/null
+++ b/arch/arm/kernel/isa.c
@@ -0,0 +1,70 @@
+/*
+ *  linux/arch/arm/kernel/isa.c
+ *
+ *  Copyright (C) 1999 Phil Blundell
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *  ISA shared memory and I/O port support, and is required to support
+ *  iopl, inb, outb and friends in userspace via glibc emulation.
+ */
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <linux/io.h>
+
+static unsigned int isa_membase, isa_portbase, isa_portshift;
+
+static struct ctl_table ctl_isa_vars[4] = {
+	{
+		.procname	= "membase",
+		.data		= &isa_membase, 
+		.maxlen		= sizeof(isa_membase),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	}, {
+		.procname	= "portbase",
+		.data		= &isa_portbase, 
+		.maxlen		= sizeof(isa_portbase),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	}, {
+		.procname	= "portshift",
+		.data		= &isa_portshift, 
+		.maxlen		= sizeof(isa_portshift),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	}, {}
+};
+
+static struct ctl_table_header *isa_sysctl_header;
+
+static struct ctl_table ctl_isa[2] = {
+	{
+		.procname	= "isa",
+		.mode		= 0555,
+		.child		= ctl_isa_vars,
+	}, {}
+};
+
+static struct ctl_table ctl_bus[2] = {
+	{
+		.procname	= "bus",
+		.mode		= 0555,
+		.child		= ctl_isa,
+	}, {}
+};
+
+void __init
+register_isa_ports(unsigned int membase, unsigned int portbase, unsigned int portshift)
+{
+	isa_membase = membase;
+	isa_portbase = portbase;
+	isa_portshift = portshift;
+	isa_sysctl_header = register_sysctl_table(ctl_bus);
+}
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
new file mode 100644
index 000000000..49fadbda8
--- /dev/null
+++ b/arch/arm/kernel/iwmmxt.S
@@ -0,0 +1,372 @@
+/*
+ *  linux/arch/arm/kernel/iwmmxt.S
+ *
+ *  XScale iWMMXt (Concan) context switching and handling
+ *
+ *  Initial code:
+ *  Copyright (c) 2003, Intel Corporation
+ *
+ *  Full lazy switching support, optimizations and more, by Nicolas Pitre
+*   Copyright (c) 2003-2004, MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+
+#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
+#define PJ4(code...)		code
+#define XSC(code...)
+#elif defined(CONFIG_CPU_MOHAWK) || \
+	defined(CONFIG_CPU_XSC3) || \
+	defined(CONFIG_CPU_XSCALE)
+#define PJ4(code...)
+#define XSC(code...)		code
+#else
+#error "Unsupported iWMMXt architecture"
+#endif
+
+#define MMX_WR0		 	(0x00)
+#define MMX_WR1		 	(0x08)
+#define MMX_WR2		 	(0x10)
+#define MMX_WR3			(0x18)
+#define MMX_WR4		 	(0x20)
+#define MMX_WR5		 	(0x28)
+#define MMX_WR6		 	(0x30)
+#define MMX_WR7		 	(0x38)
+#define MMX_WR8		 	(0x40)
+#define MMX_WR9		 	(0x48)
+#define MMX_WR10		(0x50)
+#define MMX_WR11		(0x58)
+#define MMX_WR12		(0x60)
+#define MMX_WR13		(0x68)
+#define MMX_WR14		(0x70)
+#define MMX_WR15		(0x78)
+#define MMX_WCSSF		(0x80)
+#define MMX_WCASF		(0x84)
+#define MMX_WCGR0		(0x88)
+#define MMX_WCGR1		(0x8C)
+#define MMX_WCGR2		(0x90)
+#define MMX_WCGR3		(0x94)
+
+#define MMX_SIZE		(0x98)
+
+	.text
+	.arm
+
+/*
+ * Lazy switching of Concan coprocessor context
+ *
+ * r10 = struct thread_info pointer
+ * r9  = ret_from_exception
+ * lr  = undefined instr exit
+ *
+ * called from prefetch exception handler with interrupts enabled
+ */
+
+ENTRY(iwmmxt_task_enable)
+	inc_preempt_count r10, r3
+
+	XSC(mrc	p15, 0, r2, c15, c1, 0)
+	PJ4(mrc p15, 0, r2, c1, c0, 2)
+	@ CP0 and CP1 accessible?
+	XSC(tst	r2, #0x3)
+	PJ4(tst	r2, #0xf)
+	bne	4f				@ if so no business here
+	@ enable access to CP0 and CP1
+	XSC(orr	r2, r2, #0x3)
+	XSC(mcr	p15, 0, r2, c15, c1, 0)
+	PJ4(orr	r2, r2, #0xf)
+	PJ4(mcr	p15, 0, r2, c1, c0, 2)
+
+	ldr	r3, =concan_owner
+	add	r0, r10, #TI_IWMMXT_STATE	@ get task Concan save area
+	ldr	r2, [sp, #60]			@ current task pc value
+	ldr	r1, [r3]			@ get current Concan owner
+	str	r0, [r3]			@ this task now owns Concan regs
+	sub	r2, r2, #4			@ adjust pc back
+	str	r2, [sp, #60]
+
+	mrc	p15, 0, r2, c2, c0, 0
+	mov	r2, r2				@ cpwait
+	bl	concan_save
+
+#ifdef CONFIG_PREEMPT_COUNT
+	get_thread_info r10
+#endif
+4:	dec_preempt_count r10, r3
+	ret	r9				@ normal exit from exception
+
+concan_save:
+
+	teq	r1, #0				@ test for last ownership
+	beq	concan_load			@ no owner, skip save
+
+	tmrc	r2, wCon
+
+	@ CUP? wCx
+	tst	r2, #0x1
+	beq 	1f
+
+concan_dump:
+
+	wstrw	wCSSF, [r1, #MMX_WCSSF]
+	wstrw	wCASF, [r1, #MMX_WCASF]
+	wstrw	wCGR0, [r1, #MMX_WCGR0]
+	wstrw	wCGR1, [r1, #MMX_WCGR1]
+	wstrw	wCGR2, [r1, #MMX_WCGR2]
+	wstrw	wCGR3, [r1, #MMX_WCGR3]
+
+1:	@ MUP? wRn
+	tst	r2, #0x2
+	beq	2f
+
+	wstrd	wR0,  [r1, #MMX_WR0]
+	wstrd	wR1,  [r1, #MMX_WR1]
+	wstrd	wR2,  [r1, #MMX_WR2]
+	wstrd	wR3,  [r1, #MMX_WR3]
+	wstrd	wR4,  [r1, #MMX_WR4]
+	wstrd	wR5,  [r1, #MMX_WR5]
+	wstrd	wR6,  [r1, #MMX_WR6]
+	wstrd	wR7,  [r1, #MMX_WR7]
+	wstrd	wR8,  [r1, #MMX_WR8]
+	wstrd	wR9,  [r1, #MMX_WR9]
+	wstrd	wR10, [r1, #MMX_WR10]
+	wstrd	wR11, [r1, #MMX_WR11]
+	wstrd	wR12, [r1, #MMX_WR12]
+	wstrd	wR13, [r1, #MMX_WR13]
+	wstrd	wR14, [r1, #MMX_WR14]
+	wstrd	wR15, [r1, #MMX_WR15]
+
+2:	teq	r0, #0				@ anything to load?
+	reteq	lr				@ if not, return
+
+concan_load:
+
+	@ Load wRn
+	wldrd	wR0,  [r0, #MMX_WR0]
+	wldrd	wR1,  [r0, #MMX_WR1]
+	wldrd	wR2,  [r0, #MMX_WR2]
+	wldrd	wR3,  [r0, #MMX_WR3]
+	wldrd	wR4,  [r0, #MMX_WR4]
+	wldrd	wR5,  [r0, #MMX_WR5]
+	wldrd	wR6,  [r0, #MMX_WR6]
+	wldrd	wR7,  [r0, #MMX_WR7]
+	wldrd	wR8,  [r0, #MMX_WR8]
+	wldrd	wR9,  [r0, #MMX_WR9]
+	wldrd	wR10, [r0, #MMX_WR10]
+	wldrd	wR11, [r0, #MMX_WR11]
+	wldrd	wR12, [r0, #MMX_WR12]
+	wldrd	wR13, [r0, #MMX_WR13]
+	wldrd	wR14, [r0, #MMX_WR14]
+	wldrd	wR15, [r0, #MMX_WR15]
+
+	@ Load wCx
+	wldrw	wCSSF, [r0, #MMX_WCSSF]
+	wldrw	wCASF, [r0, #MMX_WCASF]
+	wldrw	wCGR0, [r0, #MMX_WCGR0]
+	wldrw	wCGR1, [r0, #MMX_WCGR1]
+	wldrw	wCGR2, [r0, #MMX_WCGR2]
+	wldrw	wCGR3, [r0, #MMX_WCGR3]
+
+	@ clear CUP/MUP (only if r1 != 0)
+	teq	r1, #0
+	mov 	r2, #0
+	reteq	lr
+
+	tmcr	wCon, r2
+	ret	lr
+
+ENDPROC(iwmmxt_task_enable)
+
+/*
+ * Back up Concan regs to save area and disable access to them
+ * (mainly for gdb or sleep mode usage)
+ *
+ * r0 = struct thread_info pointer of target task or NULL for any
+ */
+
+ENTRY(iwmmxt_task_disable)
+
+	stmfd	sp!, {r4, lr}
+
+	mrs	ip, cpsr
+	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
+	msr	cpsr_c, r2
+
+	ldr	r3, =concan_owner
+	add	r2, r0, #TI_IWMMXT_STATE	@ get task Concan save area
+	ldr	r1, [r3]			@ get current Concan owner
+	teq	r1, #0				@ any current owner?
+	beq	1f				@ no: quit
+	teq	r0, #0				@ any owner?
+	teqne	r1, r2				@ or specified one?
+	bne	1f				@ no: quit
+
+	@ enable access to CP0 and CP1
+	XSC(mrc	p15, 0, r4, c15, c1, 0)
+	XSC(orr	r4, r4, #0x3)
+	XSC(mcr	p15, 0, r4, c15, c1, 0)
+	PJ4(mrc p15, 0, r4, c1, c0, 2)
+	PJ4(orr	r4, r4, #0xf)
+	PJ4(mcr	p15, 0, r4, c1, c0, 2)
+
+	mov	r0, #0				@ nothing to load
+	str	r0, [r3]			@ no more current owner
+	mrc	p15, 0, r2, c2, c0, 0
+	mov	r2, r2				@ cpwait
+	bl	concan_save
+
+	@ disable access to CP0 and CP1
+	XSC(bic	r4, r4, #0x3)
+	XSC(mcr	p15, 0, r4, c15, c1, 0)
+	PJ4(bic	r4, r4, #0xf)
+	PJ4(mcr	p15, 0, r4, c1, c0, 2)
+
+	mrc	p15, 0, r2, c2, c0, 0
+	mov	r2, r2				@ cpwait
+
+1:	msr	cpsr_c, ip			@ restore interrupt mode
+	ldmfd	sp!, {r4, pc}
+
+ENDPROC(iwmmxt_task_disable)
+
+/*
+ * Copy Concan state to given memory address
+ *
+ * r0 = struct thread_info pointer of target task
+ * r1 = memory address where to store Concan state
+ *
+ * this is called mainly in the creation of signal stack frames
+ */
+
+ENTRY(iwmmxt_task_copy)
+
+	mrs	ip, cpsr
+	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
+	msr	cpsr_c, r2
+
+	ldr	r3, =concan_owner
+	add	r2, r0, #TI_IWMMXT_STATE	@ get task Concan save area
+	ldr	r3, [r3]			@ get current Concan owner
+	teq	r2, r3				@ does this task own it...
+	beq	1f
+
+	@ current Concan values are in the task save area
+	msr	cpsr_c, ip			@ restore interrupt mode
+	mov	r0, r1
+	mov	r1, r2
+	mov	r2, #MMX_SIZE
+	b	memcpy
+
+1:	@ this task owns Concan regs -- grab a copy from there
+	mov	r0, #0				@ nothing to load
+	mov	r2, #3				@ save all regs
+	mov	r3, lr				@ preserve return address
+	bl	concan_dump
+	msr	cpsr_c, ip			@ restore interrupt mode
+	ret	r3
+
+ENDPROC(iwmmxt_task_copy)
+
+/*
+ * Restore Concan state from given memory address
+ *
+ * r0 = struct thread_info pointer of target task
+ * r1 = memory address where to get Concan state from
+ *
+ * this is used to restore Concan state when unwinding a signal stack frame
+ */
+
+ENTRY(iwmmxt_task_restore)
+
+	mrs	ip, cpsr
+	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
+	msr	cpsr_c, r2
+
+	ldr	r3, =concan_owner
+	add	r2, r0, #TI_IWMMXT_STATE	@ get task Concan save area
+	ldr	r3, [r3]			@ get current Concan owner
+	bic	r2, r2, #0x7			@ 64-bit alignment
+	teq	r2, r3				@ does this task own it...
+	beq	1f
+
+	@ this task doesn't own Concan regs -- use its save area
+	msr	cpsr_c, ip			@ restore interrupt mode
+	mov	r0, r2
+	mov	r2, #MMX_SIZE
+	b	memcpy
+
+1:	@ this task owns Concan regs -- load them directly
+	mov	r0, r1
+	mov	r1, #0				@ don't clear CUP/MUP
+	mov	r3, lr				@ preserve return address
+	bl	concan_load
+	msr	cpsr_c, ip			@ restore interrupt mode
+	ret	r3
+
+ENDPROC(iwmmxt_task_restore)
+
+/*
+ * Concan handling on task switch
+ *
+ * r0 = next thread_info pointer
+ *
+ * Called only from the iwmmxt notifier with task preemption disabled.
+ */
+ENTRY(iwmmxt_task_switch)
+
+	XSC(mrc	p15, 0, r1, c15, c1, 0)
+	PJ4(mrc	p15, 0, r1, c1, c0, 2)
+	@ CP0 and CP1 accessible?
+	XSC(tst	r1, #0x3)
+	PJ4(tst	r1, #0xf)
+	bne	1f				@ yes: block them for next task
+
+	ldr	r2, =concan_owner
+	add	r3, r0, #TI_IWMMXT_STATE	@ get next task Concan save area
+	ldr	r2, [r2]			@ get current Concan owner
+	teq	r2, r3				@ next task owns it?
+	retne	lr				@ no: leave Concan disabled
+
+1:	@ flip Concan access
+	XSC(eor	r1, r1, #0x3)
+	XSC(mcr	p15, 0, r1, c15, c1, 0)
+	PJ4(eor r1, r1, #0xf)
+	PJ4(mcr	p15, 0, r1, c1, c0, 2)
+
+	mrc	p15, 0, r1, c2, c0, 0
+	sub	pc, lr, r1, lsr #32		@ cpwait and return
+
+ENDPROC(iwmmxt_task_switch)
+
+/*
+ * Remove Concan ownership of given task
+ *
+ * r0 = struct thread_info pointer
+ */
+ENTRY(iwmmxt_task_release)
+
+	mrs	r2, cpsr
+	orr	ip, r2, #PSR_I_BIT		@ disable interrupts
+	msr	cpsr_c, ip
+	ldr	r3, =concan_owner
+	add	r0, r0, #TI_IWMMXT_STATE	@ get task Concan save area
+	ldr	r1, [r3]			@ get current Concan owner
+	eors	r0, r0, r1			@ if equal...
+	streq	r0, [r3]			@ then clear ownership
+	msr	cpsr_c, r2			@ restore interrupts
+	ret	lr
+
+ENDPROC(iwmmxt_task_release)
+
+	.data
+concan_owner:
+	.word	0
+
diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
new file mode 100644
index 000000000..e39cbf488
--- /dev/null
+++ b/arch/arm/kernel/jump_label.c
@@ -0,0 +1,38 @@
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/patch.h>
+#include <asm/insn.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+static void __arch_jump_label_transform(struct jump_entry *entry,
+					enum jump_label_type type,
+					bool is_static)
+{
+	void *addr = (void *)entry->code;
+	unsigned int insn;
+
+	if (type == JUMP_LABEL_ENABLE)
+		insn = arm_gen_branch(entry->code, entry->target);
+	else
+		insn = arm_gen_nop();
+
+	if (is_static)
+		__patch_text_early(addr, insn);
+	else
+		patch_text(addr, insn);
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	__arch_jump_label_transform(entry, type, false);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	__arch_jump_label_transform(entry, type, true);
+}
+
+#endif
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
new file mode 100644
index 000000000..a6ad93c9b
--- /dev/null
+++ b/arch/arm/kernel/kgdb.c
@@ -0,0 +1,287 @@
+/*
+ * arch/arm/kernel/kgdb.c
+ *
+ * ARM KGDB support
+ *
+ * Copyright (c) 2002-2004 MontaVista Software, Inc
+ * Copyright (c) 2008 Wind River Systems, Inc.
+ *
+ * Authors:  George Davis <davis_g@mvista.com>
+ *           Deepak Saxena <dsaxena@plexity.net>
+ */
+#include <linux/irq.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/uaccess.h>
+
+#include <asm/patch.h>
+#include <asm/traps.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
+{
+	{ "r0", 4, offsetof(struct pt_regs, ARM_r0)},
+	{ "r1", 4, offsetof(struct pt_regs, ARM_r1)},
+	{ "r2", 4, offsetof(struct pt_regs, ARM_r2)},
+	{ "r3", 4, offsetof(struct pt_regs, ARM_r3)},
+	{ "r4", 4, offsetof(struct pt_regs, ARM_r4)},
+	{ "r5", 4, offsetof(struct pt_regs, ARM_r5)},
+	{ "r6", 4, offsetof(struct pt_regs, ARM_r6)},
+	{ "r7", 4, offsetof(struct pt_regs, ARM_r7)},
+	{ "r8", 4, offsetof(struct pt_regs, ARM_r8)},
+	{ "r9", 4, offsetof(struct pt_regs, ARM_r9)},
+	{ "r10", 4, offsetof(struct pt_regs, ARM_r10)},
+	{ "fp", 4, offsetof(struct pt_regs, ARM_fp)},
+	{ "ip", 4, offsetof(struct pt_regs, ARM_ip)},
+	{ "sp", 4, offsetof(struct pt_regs, ARM_sp)},
+	{ "lr", 4, offsetof(struct pt_regs, ARM_lr)},
+	{ "pc", 4, offsetof(struct pt_regs, ARM_pc)},
+	{ "f0", 12, -1 },
+	{ "f1", 12, -1 },
+	{ "f2", 12, -1 },
+	{ "f3", 12, -1 },
+	{ "f4", 12, -1 },
+	{ "f5", 12, -1 },
+	{ "f6", 12, -1 },
+	{ "f7", 12, -1 },
+	{ "fps", 4, -1 },
+	{ "cpsr", 4, offsetof(struct pt_regs, ARM_cpsr)},
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+		       dbg_reg_def[regno].size);
+	else
+		memset(mem, 0, dbg_reg_def[regno].size);
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+		       dbg_reg_def[regno].size);
+	return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+	struct pt_regs *thread_regs;
+	int regno;
+
+	/* Just making sure... */
+	if (task == NULL)
+		return;
+
+	/* Initialize to zero */
+	for (regno = 0; regno < GDB_MAX_REGS; regno++)
+		gdb_regs[regno] = 0;
+
+	/* Otherwise, we have only some registers from switch_to() */
+	thread_regs		= task_pt_regs(task);
+	gdb_regs[_R0]		= thread_regs->ARM_r0;
+	gdb_regs[_R1]		= thread_regs->ARM_r1;
+	gdb_regs[_R2]		= thread_regs->ARM_r2;
+	gdb_regs[_R3]		= thread_regs->ARM_r3;
+	gdb_regs[_R4]		= thread_regs->ARM_r4;
+	gdb_regs[_R5]		= thread_regs->ARM_r5;
+	gdb_regs[_R6]		= thread_regs->ARM_r6;
+	gdb_regs[_R7]		= thread_regs->ARM_r7;
+	gdb_regs[_R8]		= thread_regs->ARM_r8;
+	gdb_regs[_R9]		= thread_regs->ARM_r9;
+	gdb_regs[_R10]		= thread_regs->ARM_r10;
+	gdb_regs[_FP]		= thread_regs->ARM_fp;
+	gdb_regs[_IP]		= thread_regs->ARM_ip;
+	gdb_regs[_SPT]		= thread_regs->ARM_sp;
+	gdb_regs[_LR]		= thread_regs->ARM_lr;
+	gdb_regs[_PC]		= thread_regs->ARM_pc;
+	gdb_regs[_CPSR]		= thread_regs->ARM_cpsr;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->ARM_pc = pc;
+}
+
+static int compiled_break;
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+			       int err_code, char *remcom_in_buffer,
+			       char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	unsigned long addr;
+	char *ptr;
+
+	switch (remcom_in_buffer[0]) {
+	case 'D':
+	case 'k':
+	case 'c':
+		/*
+		 * Try to read optional parameter, pc unchanged if no parm.
+		 * If this was a compiled breakpoint, we need to move
+		 * to the next instruction or we will just breakpoint
+		 * over and over again.
+		 */
+		ptr = &remcom_in_buffer[1];
+		if (kgdb_hex2long(&ptr, &addr))
+			linux_regs->ARM_pc = addr;
+		else if (compiled_break == 1)
+			linux_regs->ARM_pc += 4;
+
+		compiled_break = 0;
+
+		return 0;
+	}
+
+	return -1;
+}
+
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)
+{
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+
+	return 0;
+}
+
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
+{
+	compiled_break = 1;
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+
+	return 0;
+}
+
+static struct undef_hook kgdb_brkpt_hook = {
+	.instr_mask		= 0xffffffff,
+	.instr_val		= KGDB_BREAKINST,
+	.cpsr_mask		= MODE_MASK,
+	.cpsr_val		= SVC_MODE,
+	.fn			= kgdb_brk_fn
+};
+
+static struct undef_hook kgdb_compiled_brkpt_hook = {
+	.instr_mask		= 0xffffffff,
+	.instr_val		= KGDB_COMPILED_BREAK,
+	.cpsr_mask		= MODE_MASK,
+	.cpsr_val		= SVC_MODE,
+	.fn			= kgdb_compiled_brk_fn
+};
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+       kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+       local_irq_enable();
+       smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+       local_irq_disable();
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+
+	if (kgdb_handle_exception(1, args->signr, cmd, regs))
+		return NOTIFY_DONE;
+	return NOTIFY_STOP;
+}
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call	= kgdb_notify,
+	.priority	= -INT_MAX,
+};
+
+
+/**
+ *	kgdb_arch_init - Perform any architecture specific initalization.
+ *
+ *	This function will handle the initalization of any architecture
+ *	specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	int ret = register_die_notifier(&kgdb_notifier);
+
+	if (ret != 0)
+		return ret;
+
+	register_undef_hook(&kgdb_brkpt_hook);
+	register_undef_hook(&kgdb_compiled_brkpt_hook);
+
+	return 0;
+}
+
+/**
+ *	kgdb_arch_exit - Perform any architecture specific uninitalization.
+ *
+ *	This function will handle the uninitalization of any architecture
+ *	specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_undef_hook(&kgdb_brkpt_hook);
+	unregister_undef_hook(&kgdb_compiled_brkpt_hook);
+	unregister_die_notifier(&kgdb_notifier);
+}
+
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+	int err;
+
+	/* patch_text() only supports int-sized breakpoints */
+	BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE);
+
+	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+				BREAK_INSTR_SIZE);
+	if (err)
+		return err;
+
+	patch_text((void *)bpt->bpt_addr,
+		   *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr);
+
+	return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+	patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr);
+
+	return 0;
+}
+
+/*
+ * Register our undef instruction hooks with ARM undef core.
+ * We regsiter a hook specifically looking for the KGB break inst
+ * and we handle the normal undef case within the do_undefinstr
+ * handler.
+ */
+struct kgdb_arch arch_kgdb_ops = {
+#ifndef __ARMEB__
+	.gdb_bpt_instr		= {0xfe, 0xde, 0xff, 0xe7}
+#else /* ! __ARMEB__ */
+	.gdb_bpt_instr		= {0xe7, 0xff, 0xde, 0xfe}
+#endif
+};
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
new file mode 100644
index 000000000..8bf3b7c09
--- /dev/null
+++ b/arch/arm/kernel/machine_kexec.c
@@ -0,0 +1,195 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/memblock.h>
+#include <asm/pgtable.h>
+#include <linux/of_fdt.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/fncpy.h>
+#include <asm/mach-types.h>
+#include <asm/smp_plat.h>
+#include <asm/system_misc.h>
+
+extern void relocate_new_kernel(void);
+extern const unsigned int relocate_new_kernel_size;
+
+extern unsigned long kexec_start_address;
+extern unsigned long kexec_indirection_page;
+extern unsigned long kexec_mach_type;
+extern unsigned long kexec_boot_atags;
+
+static atomic_t waiting_for_crash_ipi;
+
+static unsigned long dt_mem;
+/*
+ * Provide a dummy crash_notes definition while crash dump arrives to arm.
+ * This prevents breakage of crash_notes attribute in kernel/ksysfs.c.
+ */
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	struct kexec_segment *current_segment;
+	__be32 header;
+	int i, err;
+
+	/*
+	 * Validate that if the current HW supports SMP, then the SW supports
+	 * and implements CPU hotplug for the current HW. If not, we won't be
+	 * able to kexec reliably, so fail the prepare operation.
+	 */
+	if (num_possible_cpus() > 1 && platform_can_secondary_boot() &&
+	    !platform_can_cpu_hotplug())
+		return -EINVAL;
+
+	/*
+	 * No segment at default ATAGs address. try to locate
+	 * a dtb using magic.
+	 */
+	for (i = 0; i < image->nr_segments; i++) {
+		current_segment = &image->segment[i];
+
+		if (!memblock_is_region_memory(current_segment->mem,
+					       current_segment->memsz))
+			return -EINVAL;
+
+		err = get_user(header, (__be32*)current_segment->buf);
+		if (err)
+			return err;
+
+		if (be32_to_cpu(header) == OF_DT_HEADER)
+			dt_mem = current_segment->mem;
+	}
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_crash_nonpanic_core(void *unused)
+{
+	struct pt_regs regs;
+
+	crash_setup_regs(&regs, NULL);
+	printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
+	       smp_processor_id());
+	crash_save_cpu(&regs, smp_processor_id());
+	flush_cache_all();
+
+	set_cpu_online(smp_processor_id(), false);
+	atomic_dec(&waiting_for_crash_ipi);
+	while (1)
+		cpu_relax();
+}
+
+static void machine_kexec_mask_interrupts(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	unsigned long msecs;
+
+	local_irq_disable();
+
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+	smp_call_function(machine_crash_nonpanic_core, NULL, false);
+	msecs = 1000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+		mdelay(1);
+		msecs--;
+	}
+	if (atomic_read(&waiting_for_crash_ipi) > 0)
+		pr_warn("Non-crashing CPUs did not react to IPI\n");
+
+	crash_save_cpu(regs, smp_processor_id());
+	machine_kexec_mask_interrupts();
+
+	pr_info("Loading crashdump kernel...\n");
+}
+
+/*
+ * Function pointer to optional machine-specific reinitialization
+ */
+void (*kexec_reinit)(void);
+
+void machine_kexec(struct kimage *image)
+{
+	unsigned long page_list;
+	unsigned long reboot_code_buffer_phys;
+	unsigned long reboot_entry = (unsigned long)relocate_new_kernel;
+	unsigned long reboot_entry_phys;
+	void *reboot_code_buffer;
+
+	/*
+	 * This can only happen if machine_shutdown() failed to disable some
+	 * CPU, and that can only happen if the checks in
+	 * machine_kexec_prepare() were not correct. If this fails, we can't
+	 * reliably kexec anyway, so BUG_ON is appropriate.
+	 */
+	BUG_ON(num_online_cpus() > 1);
+
+	page_list = image->head & PAGE_MASK;
+
+	/* we need both effective and real address here */
+	reboot_code_buffer_phys =
+	    page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+	reboot_code_buffer = page_address(image->control_code_page);
+
+	/* Prepare parameters for reboot_code_buffer*/
+	set_kernel_text_rw();
+	kexec_start_address = image->start;
+	kexec_indirection_page = page_list;
+	kexec_mach_type = machine_arch_type;
+	kexec_boot_atags = dt_mem ?: image->start - KEXEC_ARM_ZIMAGE_OFFSET
+				     + KEXEC_ARM_ATAGS_OFFSET;
+
+	/* copy our kernel relocation code to the control code page */
+	reboot_entry = fncpy(reboot_code_buffer,
+			     reboot_entry,
+			     relocate_new_kernel_size);
+	reboot_entry_phys = (unsigned long)reboot_entry +
+		(reboot_code_buffer_phys - (unsigned long)reboot_code_buffer);
+
+	pr_info("Bye!\n");
+
+	if (kexec_reinit)
+		kexec_reinit();
+
+	soft_restart(reboot_entry_phys);
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#ifdef CONFIG_ARM_LPAE
+	VMCOREINFO_CONFIG(ARM_LPAE);
+#endif
+}
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
new file mode 100644
index 000000000..af791f4a6
--- /dev/null
+++ b/arch/arm/kernel/module.c
@@ -0,0 +1,366 @@
+/*
+ *  linux/arch/arm/kernel/module.c
+ *
+ *  Copyright (C) 2002 Russell King.
+ *  Modified for nommu by Hyok S. Choi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Module allocation method suggested by Andi Kleen.
+ */
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/smp_plat.h>
+#include <asm/unwind.h>
+#include <asm/opcodes.h>
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR	(((unsigned long)_etext + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+void *module_alloc(unsigned long size)
+{
+	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				__builtin_return_address(0));
+}
+#endif
+
+int
+apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+	       unsigned int relindex, struct module *module)
+{
+	Elf32_Shdr *symsec = sechdrs + symindex;
+	Elf32_Shdr *relsec = sechdrs + relindex;
+	Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
+	Elf32_Rel *rel = (void *)relsec->sh_addr;
+	unsigned int i;
+
+	for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
+		unsigned long loc;
+		Elf32_Sym *sym;
+		const char *symname;
+		s32 offset;
+		u32 tmp;
+#ifdef CONFIG_THUMB2_KERNEL
+		u32 upper, lower, sign, j1, j2;
+#endif
+
+		offset = ELF32_R_SYM(rel->r_info);
+		if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
+			pr_err("%s: section %u reloc %u: bad relocation sym offset\n",
+				module->name, relindex, i);
+			return -ENOEXEC;
+		}
+
+		sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
+		symname = strtab + sym->st_name;
+
+		if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) {
+			pr_err("%s: section %u reloc %u sym '%s': out of bounds relocation, offset %d size %u\n",
+			       module->name, relindex, i, symname,
+			       rel->r_offset, dstsec->sh_size);
+			return -ENOEXEC;
+		}
+
+		loc = dstsec->sh_addr + rel->r_offset;
+
+		switch (ELF32_R_TYPE(rel->r_info)) {
+		case R_ARM_NONE:
+			/* ignore */
+			break;
+
+		case R_ARM_ABS32:
+		case R_ARM_TARGET1:
+			*(u32 *)loc += sym->st_value;
+			break;
+
+		case R_ARM_PC24:
+		case R_ARM_CALL:
+		case R_ARM_JUMP24:
+			if (sym->st_value & 3) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (ARM -> Thumb)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
+			offset = __mem_to_opcode_arm(*(u32 *)loc);
+			offset = (offset & 0x00ffffff) << 2;
+			if (offset & 0x02000000)
+				offset -= 0x04000000;
+
+			offset += sym->st_value - loc;
+			if (offset <= (s32)0xfe000000 ||
+			    offset >= (s32)0x02000000) {
+				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+				       module->name, relindex, i, symname,
+				       ELF32_R_TYPE(rel->r_info), loc,
+				       sym->st_value);
+				return -ENOEXEC;
+			}
+
+			offset >>= 2;
+			offset &= 0x00ffffff;
+
+			*(u32 *)loc &= __opcode_to_mem_arm(0xff000000);
+			*(u32 *)loc |= __opcode_to_mem_arm(offset);
+			break;
+
+	       case R_ARM_V4BX:
+		       /* Preserve Rm and the condition code. Alter
+			* other bits to re-code instruction as
+			* MOV PC,Rm.
+			*/
+		       *(u32 *)loc &= __opcode_to_mem_arm(0xf000000f);
+		       *(u32 *)loc |= __opcode_to_mem_arm(0x01a0f000);
+		       break;
+
+		case R_ARM_PREL31:
+			offset = *(u32 *)loc + sym->st_value - loc;
+			*(u32 *)loc = offset & 0x7fffffff;
+			break;
+
+		case R_ARM_MOVW_ABS_NC:
+		case R_ARM_MOVT_ABS:
+			offset = tmp = __mem_to_opcode_arm(*(u32 *)loc);
+			offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
+			offset = (offset ^ 0x8000) - 0x8000;
+
+			offset += sym->st_value;
+			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
+				offset >>= 16;
+
+			tmp &= 0xfff0f000;
+			tmp |= ((offset & 0xf000) << 4) |
+				(offset & 0x0fff);
+
+			*(u32 *)loc = __opcode_to_mem_arm(tmp);
+			break;
+
+#ifdef CONFIG_THUMB2_KERNEL
+		case R_ARM_THM_CALL:
+		case R_ARM_THM_JUMP24:
+			/*
+			 * For function symbols, only Thumb addresses are
+			 * allowed (no interworking).
+			 *
+			 * For non-function symbols, the destination
+			 * has no specific ARM/Thumb disposition, so
+			 * the branch is resolved under the assumption
+			 * that interworking is not required.
+			 */
+			if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
+			    !(sym->st_value & 1)) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (Thumb -> ARM)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
+			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
+			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
+
+			/*
+			 * 25 bit signed address range (Thumb-2 BL and B.W
+			 * instructions):
+			 *   S:I1:I2:imm10:imm11:0
+			 * where:
+			 *   S     = upper[10]   = offset[24]
+			 *   I1    = ~(J1 ^ S)   = offset[23]
+			 *   I2    = ~(J2 ^ S)   = offset[22]
+			 *   imm10 = upper[9:0]  = offset[21:12]
+			 *   imm11 = lower[10:0] = offset[11:1]
+			 *   J1    = lower[13]
+			 *   J2    = lower[11]
+			 */
+			sign = (upper >> 10) & 1;
+			j1 = (lower >> 13) & 1;
+			j2 = (lower >> 11) & 1;
+			offset = (sign << 24) | ((~(j1 ^ sign) & 1) << 23) |
+				((~(j2 ^ sign) & 1) << 22) |
+				((upper & 0x03ff) << 12) |
+				((lower & 0x07ff) << 1);
+			if (offset & 0x01000000)
+				offset -= 0x02000000;
+			offset += sym->st_value - loc;
+
+			if (offset <= (s32)0xff000000 ||
+			    offset >= (s32)0x01000000) {
+				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+				       module->name, relindex, i, symname,
+				       ELF32_R_TYPE(rel->r_info), loc,
+				       sym->st_value);
+				return -ENOEXEC;
+			}
+
+			sign = (offset >> 24) & 1;
+			j1 = sign ^ (~(offset >> 23) & 1);
+			j2 = sign ^ (~(offset >> 22) & 1);
+			upper = (u16)((upper & 0xf800) | (sign << 10) |
+					    ((offset >> 12) & 0x03ff));
+			lower = (u16)((lower & 0xd000) |
+				      (j1 << 13) | (j2 << 11) |
+				      ((offset >> 1) & 0x07ff));
+
+			*(u16 *)loc = __opcode_to_mem_thumb16(upper);
+			*(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower);
+			break;
+
+		case R_ARM_THM_MOVW_ABS_NC:
+		case R_ARM_THM_MOVT_ABS:
+			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
+			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
+
+			/*
+			 * MOVT/MOVW instructions encoding in Thumb-2:
+			 *
+			 * i	= upper[10]
+			 * imm4	= upper[3:0]
+			 * imm3	= lower[14:12]
+			 * imm8	= lower[7:0]
+			 *
+			 * imm16 = imm4:i:imm3:imm8
+			 */
+			offset = ((upper & 0x000f) << 12) |
+				((upper & 0x0400) << 1) |
+				((lower & 0x7000) >> 4) | (lower & 0x00ff);
+			offset = (offset ^ 0x8000) - 0x8000;
+			offset += sym->st_value;
+
+			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
+				offset >>= 16;
+
+			upper = (u16)((upper & 0xfbf0) |
+				      ((offset & 0xf000) >> 12) |
+				      ((offset & 0x0800) >> 1));
+			lower = (u16)((lower & 0x8f00) |
+				      ((offset & 0x0700) << 4) |
+				      (offset & 0x00ff));
+			*(u16 *)loc = __opcode_to_mem_thumb16(upper);
+			*(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower);
+			break;
+#endif
+
+		default:
+			pr_err("%s: unknown relocation: %u\n",
+			       module->name, ELF32_R_TYPE(rel->r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+struct mod_unwind_map {
+	const Elf_Shdr *unw_sec;
+	const Elf_Shdr *txt_sec;
+};
+
+static const Elf_Shdr *find_mod_section(const Elf32_Ehdr *hdr,
+	const Elf_Shdr *sechdrs, const char *name)
+{
+	const Elf_Shdr *s, *se;
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++)
+		if (strcmp(name, secstrs + s->sh_name) == 0)
+			return s;
+
+	return NULL;
+}
+
+extern void fixup_pv_table(const void *, unsigned long);
+extern void fixup_smp(const void *, unsigned long);
+
+int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
+		    struct module *mod)
+{
+	const Elf_Shdr *s = NULL;
+#ifdef CONFIG_ARM_UNWIND
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum;
+	struct mod_unwind_map maps[ARM_SEC_MAX];
+	int i;
+
+	memset(maps, 0, sizeof(maps));
+
+	for (s = sechdrs; s < sechdrs_end; s++) {
+		const char *secname = secstrs + s->sh_name;
+
+		if (!(s->sh_flags & SHF_ALLOC))
+			continue;
+
+		if (strcmp(".ARM.exidx.init.text", secname) == 0)
+			maps[ARM_SEC_INIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx", secname) == 0)
+			maps[ARM_SEC_CORE].unw_sec = s;
+		else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
+			maps[ARM_SEC_EXIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0)
+			maps[ARM_SEC_UNLIKELY].unw_sec = s;
+		else if (strcmp(".ARM.exidx.text.hot", secname) == 0)
+			maps[ARM_SEC_HOT].unw_sec = s;
+		else if (strcmp(".init.text", secname) == 0)
+			maps[ARM_SEC_INIT].txt_sec = s;
+		else if (strcmp(".text", secname) == 0)
+			maps[ARM_SEC_CORE].txt_sec = s;
+		else if (strcmp(".exit.text", secname) == 0)
+			maps[ARM_SEC_EXIT].txt_sec = s;
+		else if (strcmp(".text.unlikely", secname) == 0)
+			maps[ARM_SEC_UNLIKELY].txt_sec = s;
+		else if (strcmp(".text.hot", secname) == 0)
+			maps[ARM_SEC_HOT].txt_sec = s;
+	}
+
+	for (i = 0; i < ARM_SEC_MAX; i++)
+		if (maps[i].unw_sec && maps[i].txt_sec)
+			mod->arch.unwind[i] =
+				unwind_table_add(maps[i].unw_sec->sh_addr,
+					         maps[i].unw_sec->sh_size,
+					         maps[i].txt_sec->sh_addr,
+					         maps[i].txt_sec->sh_size);
+#endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+	s = find_mod_section(hdr, sechdrs, ".pv_table");
+	if (s)
+		fixup_pv_table((void *)s->sh_addr, s->sh_size);
+#endif
+	s = find_mod_section(hdr, sechdrs, ".alt.smp.init");
+	if (s && !is_smp())
+#ifdef CONFIG_SMP_ON_UP
+		fixup_smp((void *)s->sh_addr, s->sh_size);
+#else
+		return -EINVAL;
+#endif
+	return 0;
+}
+
+void
+module_arch_cleanup(struct module *mod)
+{
+#ifdef CONFIG_ARM_UNWIND
+	int i;
+
+	for (i = 0; i < ARM_SEC_MAX; i++)
+		if (mod->arch.unwind[i])
+			unwind_table_del(mod->arch.unwind[i]);
+#endif
+}
diff --git a/arch/arm/kernel/opcodes.c b/arch/arm/kernel/opcodes.c
new file mode 100644
index 000000000..f8179c6a8
--- /dev/null
+++ b/arch/arm/kernel/opcodes.c
@@ -0,0 +1,72 @@
+/*
+ *  linux/arch/arm/kernel/opcodes.c
+ *
+ *  A32 condition code lookup feature moved from nwfpe/fpopcode.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <asm/opcodes.h>
+
+#define ARM_OPCODE_CONDITION_UNCOND 0xf
+
+/*
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+/*
+ * Returns:
+ * ARM_OPCODE_CONDTEST_FAIL   - if condition fails
+ * ARM_OPCODE_CONDTEST_PASS   - if condition passes (including AL)
+ * ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional
+ *                              opcode space from v5 onwards
+ *
+ * Code that tests whether a conditional instruction would pass its condition
+ * check should check that return value == ARM_OPCODE_CONDTEST_PASS.
+ *
+ * Code that tests if a condition means that the instruction would be executed
+ * (regardless of conditional or unconditional) should instead check that the
+ * return value != ARM_OPCODE_CONDTEST_FAIL.
+ */
+asmlinkage unsigned int arm_check_condition(u32 opcode, u32 psr)
+{
+	u32 cc_bits  = opcode >> 28;
+	u32 psr_cond = psr >> 28;
+	unsigned int ret;
+
+	if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+		if ((cc_map[cc_bits] >> (psr_cond)) & 1)
+			ret = ARM_OPCODE_CONDTEST_PASS;
+		else
+			ret = ARM_OPCODE_CONDTEST_FAIL;
+	} else {
+		ret = ARM_OPCODE_CONDTEST_UNCOND;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(arm_check_condition);
diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
new file mode 100644
index 000000000..69bda1a57
--- /dev/null
+++ b/arch/arm/kernel/patch.c
@@ -0,0 +1,128 @@
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+#include <asm/smp_plat.h>
+#include <asm/opcodes.h>
+#include <asm/patch.h>
+
+struct patch {
+	void *addr;
+	unsigned int insn;
+};
+
+static DEFINE_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
+	__acquires(&patch_lock)
+{
+	unsigned int uintaddr = (uintptr_t) addr;
+	bool module = !core_kernel_text(uintaddr);
+	struct page *page;
+
+	if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+		page = vmalloc_to_page(addr);
+	else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+		page = virt_to_page(addr);
+	else
+		return addr;
+
+	if (flags)
+		spin_lock_irqsave(&patch_lock, *flags);
+	else
+		__acquire(&patch_lock);
+
+	set_fixmap(fixmap, page_to_phys(page));
+
+	return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
+	__releases(&patch_lock)
+{
+	clear_fixmap(fixmap);
+
+	if (flags)
+		spin_unlock_irqrestore(&patch_lock, *flags);
+	else
+		__release(&patch_lock);
+}
+
+void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
+{
+	bool thumb2 = IS_ENABLED(CONFIG_THUMB2_KERNEL);
+	unsigned int uintaddr = (uintptr_t) addr;
+	bool twopage = false;
+	unsigned long flags;
+	void *waddr = addr;
+	int size;
+
+	if (remap)
+		waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
+	else
+		__acquire(&patch_lock);
+
+	if (thumb2 && __opcode_is_thumb16(insn)) {
+		*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
+		size = sizeof(u16);
+	} else if (thumb2 && (uintaddr & 2)) {
+		u16 first = __opcode_thumb32_first(insn);
+		u16 second = __opcode_thumb32_second(insn);
+		u16 *addrh0 = waddr;
+		u16 *addrh1 = waddr + 2;
+
+		twopage = (uintaddr & ~PAGE_MASK) == PAGE_SIZE - 2;
+		if (twopage && remap)
+			addrh1 = patch_map(addr + 2, FIX_TEXT_POKE1, NULL);
+
+		*addrh0 = __opcode_to_mem_thumb16(first);
+		*addrh1 = __opcode_to_mem_thumb16(second);
+
+		if (twopage && addrh1 != addr + 2) {
+			flush_kernel_vmap_range(addrh1, 2);
+			patch_unmap(FIX_TEXT_POKE1, NULL);
+		}
+
+		size = sizeof(u32);
+	} else {
+		if (thumb2)
+			insn = __opcode_to_mem_thumb32(insn);
+		else
+			insn = __opcode_to_mem_arm(insn);
+
+		*(u32 *)waddr = insn;
+		size = sizeof(u32);
+	}
+
+	if (waddr != addr) {
+		flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
+		patch_unmap(FIX_TEXT_POKE0, &flags);
+	} else
+		__release(&patch_lock);
+
+	flush_icache_range((uintptr_t)(addr),
+			   (uintptr_t)(addr) + size);
+}
+
+static int __kprobes patch_text_stop_machine(void *data)
+{
+	struct patch *patch = data;
+
+	__patch_text(patch->addr, patch->insn);
+
+	return 0;
+}
+
+void __kprobes patch_text(void *addr, unsigned int insn)
+{
+	struct patch patch = {
+		.addr = addr,
+		.insn = insn,
+	};
+
+	stop_machine(patch_text_stop_machine, &patch, NULL);
+}
diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
new file mode 100644
index 000000000..4e02ae595
--- /dev/null
+++ b/arch/arm/kernel/perf_callchain.c
@@ -0,0 +1,136 @@
+/*
+ * ARM callchain support
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based on the ARM OProfile backtrace code.
+ */
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/stacktrace.h>
+
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct frame_tail {
+	struct frame_tail __user *fp;
+	unsigned long sp;
+	unsigned long lr;
+} __attribute__((packed));
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static struct frame_tail __user *
+user_backtrace(struct frame_tail __user *tail,
+	       struct perf_callchain_entry *entry)
+{
+	struct frame_tail buftail;
+	unsigned long err;
+
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err)
+		return NULL;
+
+	perf_callchain_store(entry, buftail.lr);
+
+	/*
+	 * Frame pointers should strictly progress back up the stack
+	 * (towards higher addresses).
+	 */
+	if (tail + 1 >= buftail.fp)
+		return NULL;
+
+	return buftail.fp - 1;
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	struct frame_tail __user *tail;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	perf_callchain_store(entry, regs->ARM_pc);
+
+	if (!current->mm)
+		return;
+
+	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
+
+	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+	       tail && !((unsigned long)tail & 0x3))
+		tail = user_backtrace(tail, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+		void *data)
+{
+	struct perf_callchain_entry *entry = data;
+	perf_callchain_store(entry, fr->pc);
+	return 0;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	struct stackframe fr;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	arm_get_current_stackframe(regs, &fr);
+	walk_stackframe(&fr, callchain_trace, entry);
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		return perf_guest_cbs->get_guest_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	int misc = 0;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		if (perf_guest_cbs->is_user_mode())
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
new file mode 100644
index 000000000..4a86a0133
--- /dev/null
+++ b/arch/arm/kernel/perf_event.c
@@ -0,0 +1,553 @@
+#undef DEBUG
+
+/*
+ * ARM performance counter support.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based on the sparc64 perf event code, which is in turn based
+ * on the x86 code.
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+static int
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result, ret;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	return ret;
+}
+
+static int
+armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
+{
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+	return (int)(config & raw_event_mask);
+}
+
+int
+armpmu_map_event(struct perf_event *event,
+		 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+		 const unsigned (*cache_map)
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX],
+		 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+	int type = event->attr.type;
+
+	if (type == event->pmu->type)
+		return armpmu_map_raw_event(raw_event_mask, config);
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_hw_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int armpmu_event_set_period(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;
+
+	local64_set(&hwc->prev_count, (u64)-left);
+
+	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+u64 armpmu_event_update(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = armpmu->read_counter(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void
+armpmu_read(struct perf_event *event)
+{
+	armpmu_event_update(event);
+}
+
+static void
+armpmu_stop(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in armpmu_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		armpmu->disable(event);
+		armpmu_event_update(event);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static void armpmu_start(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	/*
+	 * Set the period again. Some counters can't be stopped, so when we
+	 * were stopped we simply disabled the IRQ source and the counter
+	 * may have been left counting. If we don't do this step then we may
+	 * get an interrupt too soon or *way* too late if the overflow has
+	 * happened since disabling.
+	 */
+	armpmu_event_set_period(event);
+	armpmu->enable(event);
+}
+
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	armpmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
+	if (armpmu->clear_event_idx)
+		armpmu->clear_event_idx(hw_events, event);
+
+	perf_event_update_userpage(event);
+}
+
+static int
+armpmu_add(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
+
+	perf_pmu_disable(event->pmu);
+
+	/* If we don't have a space for the counter then finish early. */
+	idx = armpmu->get_event_idx(hw_events, event);
+	if (idx < 0) {
+		err = idx;
+		goto out;
+	}
+
+	/*
+	 * If there is an event in the counter we are going to use then make
+	 * sure it is disabled.
+	 */
+	event->hw.idx = idx;
+	armpmu->disable(event);
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		armpmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+			       struct perf_event *event)
+{
+	struct arm_pmu *armpmu;
+
+	if (is_software_event(event))
+		return 1;
+
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
+	if (event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	armpmu = to_arm_pmu(event->pmu);
+	return armpmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int
+validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct pmu_hw_events fake_pmu;
+
+	/*
+	 * Initialise the fake PMU. We only need to populate the
+	 * used_mask for the purposes of validation.
+	 */
+	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
+
+	if (!validate_event(event->pmu, &fake_pmu, leader))
+		return -EINVAL;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
+			return -EINVAL;
+	}
+
+	if (!validate_event(event->pmu, &fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
+{
+	struct arm_pmu *armpmu;
+	struct platform_device *plat_device;
+	struct arm_pmu_platdata *plat;
+	int ret;
+	u64 start_clock, finish_clock;
+
+	/*
+	 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
+	 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
+	 * do any necessary shifting, we just need to perform the first
+	 * dereference.
+	 */
+	armpmu = *(void **)dev;
+	plat_device = armpmu->plat_device;
+	plat = dev_get_platdata(&plat_device->dev);
+
+	start_clock = sched_clock();
+	if (plat && plat->handle_irq)
+		ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
+	else
+		ret = armpmu->handle_irq(irq, armpmu);
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+	return ret;
+}
+
+static void
+armpmu_release_hardware(struct arm_pmu *armpmu)
+{
+	armpmu->free_irq(armpmu);
+	pm_runtime_put_sync(&armpmu->plat_device->dev);
+}
+
+static int
+armpmu_reserve_hardware(struct arm_pmu *armpmu)
+{
+	int err;
+	struct platform_device *pmu_device = armpmu->plat_device;
+
+	if (!pmu_device)
+		return -ENODEV;
+
+	pm_runtime_get_sync(&pmu_device->dev);
+	err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
+	if (err) {
+		armpmu_release_hardware(armpmu);
+		return err;
+	}
+
+	return 0;
+}
+
+static void
+hw_perf_event_destroy(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	atomic_t *active_events	 = &armpmu->active_events;
+	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
+		armpmu_release_hardware(armpmu);
+		mutex_unlock(pmu_reserve_mutex);
+	}
+}
+
+static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
+}
+
+static int
+__hw_perf_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping;
+
+	mapping = armpmu->map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
+		pr_debug("ARM performance counters do not support "
+			 "mode exclusion\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base	    |= (unsigned long)mapping;
+
+	if (!is_sampling_event(event)) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		hwc->sample_period  = armpmu->max_period >> 1;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	if (event->group_leader != event) {
+		if (validate_group(event) != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int armpmu_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	int err = 0;
+	atomic_t *active_events = &armpmu->active_events;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	if (armpmu->map_event(event) == -ENOENT)
+		return -ENOENT;
+
+	event->destroy = hw_perf_event_destroy;
+
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&armpmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = armpmu_reserve_hardware(armpmu);
+
+		if (!err)
+			atomic_inc(active_events);
+		mutex_unlock(&armpmu->reserve_mutex);
+	}
+
+	if (err)
+		return err;
+
+	err = __hw_perf_event_init(event);
+	if (err)
+		hw_perf_event_destroy(event);
+
+	return err;
+}
+
+static void armpmu_enable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	if (enabled)
+		armpmu->start(armpmu);
+}
+
+static void armpmu_disable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	armpmu->stop(armpmu);
+}
+
+#ifdef CONFIG_PM
+static int armpmu_runtime_resume(struct device *dev)
+{
+	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
+
+	if (plat && plat->runtime_resume)
+		return plat->runtime_resume(dev);
+
+	return 0;
+}
+
+static int armpmu_runtime_suspend(struct device *dev)
+{
+	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
+
+	if (plat && plat->runtime_suspend)
+		return plat->runtime_suspend(dev);
+
+	return 0;
+}
+#endif
+
+const struct dev_pm_ops armpmu_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
+};
+
+static void armpmu_init(struct arm_pmu *armpmu)
+{
+	atomic_set(&armpmu->active_events, 0);
+	mutex_init(&armpmu->reserve_mutex);
+
+	armpmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+	};
+}
+
+int armpmu_register(struct arm_pmu *armpmu, int type)
+{
+	armpmu_init(armpmu);
+	pm_runtime_enable(&armpmu->plat_device->dev);
+	pr_info("enabled with %s PMU driver, %d counters available\n",
+			armpmu->name, armpmu->num_events);
+	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
+}
+
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
new file mode 100644
index 000000000..3b8c2833c
--- /dev/null
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -0,0 +1,421 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+#define pr_fmt(fmt) "CPU PMU: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+/* Set at runtime when we know what CPU type we are. */
+static struct arm_pmu *cpu_pmu;
+
+/*
+ * Despite the names, these two functions are CPU-specific and are used
+ * by the OProfile/perf code.
+ */
+const char *perf_pmu_name(void)
+{
+	if (!cpu_pmu)
+		return NULL;
+
+	return cpu_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	int max_events = 0;
+
+	if (cpu_pmu != NULL)
+		max_events = cpu_pmu->num_events;
+
+	return max_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+/* Include the PMU-specific implementations. */
+#include "perf_event_xscale.c"
+#include "perf_event_v6.c"
+#include "perf_event_v7.c"
+
+static void cpu_pmu_enable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void cpu_pmu_disable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	disable_percpu_irq(irq);
+}
+
+static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+		free_percpu_irq(irq, &hw_events->percpu_pmu);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+				continue;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq >= 0)
+				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+		}
+	}
+}
+
+static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+{
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	if (!pmu_device)
+		return -ENODEV;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
+		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+		return 0;
+	}
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		err = request_percpu_irq(irq, handler, "arm-pmu",
+					 &hw_events->percpu_pmu);
+		if (err) {
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			return err;
+		}
+		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			err = 0;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq < 0)
+				continue;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			/*
+			 * If we have a single PMU interrupt that we can't shift,
+			 * assume that we're running on a uniprocessor machine and
+			 * continue. Otherwise, continue without this interrupt.
+			 */
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
+				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+					irq, cpu);
+				continue;
+			}
+
+			err = request_irq(irq, handler,
+					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+			if (err) {
+				pr_err("unable to request IRQ%d for ARM PMU counters\n",
+					irq);
+				return err;
+			}
+
+			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
+			  void *hcpu)
+{
+	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
+
+	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+		return NOTIFY_DONE;
+
+	if (pmu->reset)
+		pmu->reset(pmu);
+	else
+		return NOTIFY_DONE;
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int err;
+	int cpu;
+	struct pmu_hw_events __percpu *cpu_hw_events;
+
+	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
+	if (!cpu_hw_events)
+		return -ENOMEM;
+
+	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
+	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
+	if (err)
+		goto out_hw_events;
+
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+		events->percpu_pmu = cpu_pmu;
+	}
+
+	cpu_pmu->hw_events	= cpu_hw_events;
+	cpu_pmu->request_irq	= cpu_pmu_request_irq;
+	cpu_pmu->free_irq	= cpu_pmu_free_irq;
+
+	/* Ensure the PMU has sane values out of reset. */
+	if (cpu_pmu->reset)
+		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+
+	/* If no interrupts available, set the corresponding capability flag */
+	if (!platform_get_irq(cpu_pmu->plat_device, 0))
+		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	return 0;
+
+out_hw_events:
+	free_percpu(cpu_hw_events);
+	return err;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+	free_percpu(cpu_pmu->hw_events);
+}
+
+/*
+ * PMU platform driver and devicetree bindings.
+ */
+static const struct of_device_id cpu_pmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a17-pmu",	.data = armv7_a17_pmu_init},
+	{.compatible = "arm,cortex-a15-pmu",	.data = armv7_a15_pmu_init},
+	{.compatible = "arm,cortex-a12-pmu",	.data = armv7_a12_pmu_init},
+	{.compatible = "arm,cortex-a9-pmu",	.data = armv7_a9_pmu_init},
+	{.compatible = "arm,cortex-a8-pmu",	.data = armv7_a8_pmu_init},
+	{.compatible = "arm,cortex-a7-pmu",	.data = armv7_a7_pmu_init},
+	{.compatible = "arm,cortex-a5-pmu",	.data = armv7_a5_pmu_init},
+	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
+	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
+	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
+	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
+	{.compatible = "qcom,scorpion-pmu",	.data = scorpion_pmu_init},
+	{.compatible = "qcom,scorpion-mp-pmu",	.data = scorpion_mp_pmu_init},
+	{},
+};
+
+static struct platform_device_id cpu_pmu_plat_device_ids[] = {
+	{.name = "arm-pmu"},
+	{.name = "armv6-pmu"},
+	{.name = "armv7-pmu"},
+	{.name = "xscale-pmu"},
+	{},
+};
+
+static const struct pmu_probe_info pmu_probe_table[] = {
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+	{ /* sentinel value */ }
+};
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct arm_pmu *pmu)
+{
+	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
+	int ret = -ENODEV;
+	const struct pmu_probe_info *info;
+
+	pr_info("probing PMU on CPU %d\n", cpu);
+
+	for (info = pmu_probe_table; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
+		break;
+	}
+
+	put_cpu();
+	return ret;
+}
+
+static int of_pmu_irq_cfg(struct platform_device *pdev)
+{
+	int i, irq;
+	int *irqs;
+
+	/* Don't bother with PPIs; they're already affine */
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0 && irq_is_percpu(irq))
+		return 0;
+
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(pdev->dev.of_node), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+	}
+
+	if (i == pdev->num_resources)
+		cpu_pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
+	return 0;
+}
+
+static int cpu_pmu_device_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id;
+	const int (*init_fn)(struct arm_pmu *);
+	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
+
+	if (cpu_pmu) {
+		pr_info("attempt to register multiple PMU devices!\n");
+		return -ENOSPC;
+	}
+
+	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!\n");
+		return -ENOMEM;
+	}
+
+	cpu_pmu = pmu;
+	cpu_pmu->plat_device = pdev;
+
+	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
+		init_fn = of_id->data;
+
+		ret = of_pmu_irq_cfg(pdev);
+		if (!ret)
+			ret = init_fn(pmu);
+	} else {
+		ret = probe_current_pmu(pmu);
+	}
+
+	if (ret) {
+		pr_info("failed to probe PMU!\n");
+		goto out_free;
+	}
+
+	ret = cpu_pmu_init(cpu_pmu);
+	if (ret)
+		goto out_free;
+
+	ret = armpmu_register(cpu_pmu, -1);
+	if (ret)
+		goto out_destroy;
+
+	return 0;
+
+out_destroy:
+	cpu_pmu_destroy(cpu_pmu);
+out_free:
+	pr_info("failed to register PMU devices!\n");
+	kfree(pmu);
+	return ret;
+}
+
+static struct platform_driver cpu_pmu_driver = {
+	.driver		= {
+		.name	= "arm-pmu",
+		.pm	= &armpmu_dev_pm_ops,
+		.of_match_table = cpu_pmu_of_device_ids,
+	},
+	.probe		= cpu_pmu_device_probe,
+	.id_table	= cpu_pmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+	return platform_driver_register(&cpu_pmu_driver);
+}
+device_initcall(register_pmu_driver);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
new file mode 100644
index 000000000..f2ffd5c54
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -0,0 +1,566 @@
+/*
+ * ARMv6 Performance counter handling code.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ *
+ * ARMv6 has 2 configurable performance counters and a single cycle counter.
+ * They all share a single reset bit but can be written to zero so we can use
+ * that for a reset.
+ *
+ * The counters can't be individually enabled or disabled so when we remove
+ * one event and replace it with another we could get spurious counts from the
+ * wrong event. However, we can take advantage of the fact that the
+ * performance counters can export events to the event bus, and the event bus
+ * itself can be monitored. This requires that we *don't* export the events to
+ * the event bus. The procedure for disabling a configurable counter is:
+ *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
+ *	  effectively stops the counter from counting.
+ *	- disable the counter's interrupt generation (each counter has it's
+ *	  own interrupt enable bit).
+ * Once stopped, the counter value can be written as 0 to reset.
+ *
+ * To enable a counter:
+ *	- enable the counter's interrupt generation.
+ *	- set the new event type.
+ *
+ * Note: the dedicated cycle counter only counts cycles and can't be
+ * enabled/disabled independently of the others. When we want to disable the
+ * cycle counter, we have to just disable the interrupt reporting and start
+ * ignoring that counter. When re-enabling, we have to reset the value and
+ * enable the interrupt.
+ */
+
+#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
+enum armv6_perf_types {
+	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
+	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
+	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
+	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
+	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
+	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
+	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
+	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
+	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
+	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
+	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
+	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
+	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
+	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
+	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
+	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
+	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
+	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
+	ARMV6_PERFCTR_NOP		    = 0x20,
+};
+
+enum armv6_counters {
+	ARMV6_CYCLE_COUNTER = 0,
+	ARMV6_COUNTER0,
+	ARMV6_COUNTER1,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6_PERFCTR_INSTR_EXEC,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6_PERFCTR_BR_EXEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6_PERFCTR_BR_MISPREDICT,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6_PERFCTR_IBUF_STALL,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6_PERFCTR_LSU_FULL_STALL,
+};
+
+static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
+
+	/*
+	 * The ARM performance counters can count micro DTLB misses, micro ITLB
+	 * misses and main TLB misses. There isn't an event for TLB misses, so
+	 * use the micro misses here and if users want the main TLB misses they
+	 * can use a raw counter.
+	 */
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
+};
+
+enum armv6mpcore_perf_types {
+	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
+	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
+	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
+	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
+	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
+	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
+	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
+	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
+	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
+	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
+	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
+	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
+	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
+	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
+	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
+	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
+	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
+	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
+	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
+	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6MPCORE_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6MPCORE_PERFCTR_INSTR_EXEC,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6MPCORE_PERFCTR_BR_EXEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6MPCORE_PERFCTR_IBUF_STALL,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
+};
+
+static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+
+	/*
+	 * The ARM performance counters can count micro DTLB misses, micro ITLB
+	 * misses and main TLB misses. There isn't an event for TLB misses, so
+	 * use the micro misses here and if users want the main TLB misses they
+	 * can use a raw counter.
+	 */
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6MPCORE_PERFCTR_DTLB_MISS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6MPCORE_PERFCTR_DTLB_MISS,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6MPCORE_PERFCTR_ITLB_MISS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6MPCORE_PERFCTR_ITLB_MISS,
+};
+
+static inline unsigned long
+armv6_pmcr_read(void)
+{
+	u32 val;
+	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
+	return val;
+}
+
+static inline void
+armv6_pmcr_write(unsigned long val)
+{
+	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
+}
+
+#define ARMV6_PMCR_ENABLE		(1 << 0)
+#define ARMV6_PMCR_CTR01_RESET		(1 << 1)
+#define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
+#define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
+#define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
+#define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
+#define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
+#define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
+#define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
+#define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
+#define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
+#define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
+#define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
+#define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
+
+#define ARMV6_PMCR_OVERFLOWED_MASK \
+	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
+	 ARMV6_PMCR_CCOUNT_OVERFLOW)
+
+static inline int
+armv6_pmcr_has_overflowed(unsigned long pmcr)
+{
+	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
+}
+
+static inline int
+armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
+				  enum armv6_counters counter)
+{
+	int ret = 0;
+
+	if (ARMV6_CYCLE_COUNTER == counter)
+		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
+	else if (ARMV6_COUNTER0 == counter)
+		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
+	else if (ARMV6_COUNTER1 == counter)
+		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+	return ret;
+}
+
+static inline u32 armv6pmu_read_counter(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+	unsigned long value = 0;
+
+	if (ARMV6_CYCLE_COUNTER == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
+	else if (ARMV6_COUNTER0 == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
+	else if (ARMV6_COUNTER1 == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+	return value;
+}
+
+static inline void armv6pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	if (ARMV6_CYCLE_COUNTER == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
+	else if (ARMV6_COUNTER0 == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
+	else if (ARMV6_COUNTER1 == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+}
+
+static void armv6pmu_enable_event(struct perf_event *event)
+{
+	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= 0;
+		evt	= ARMV6_PMCR_CCOUNT_IEN;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
+		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
+			  ARMV6_PMCR_COUNT0_IEN;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
+		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
+			  ARMV6_PMCR_COUNT1_IEN;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Mask out the current event and set the counter to count the event
+	 * that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static irqreturn_t
+armv6pmu_handle_irq(int irq_num,
+		    void *dev)
+{
+	unsigned long pmcr = armv6_pmcr_read();
+	struct perf_sample_data data;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	int idx;
+
+	if (!armv6_pmcr_has_overflowed(pmcr))
+		return IRQ_NONE;
+
+	regs = get_irq_regs();
+
+	/*
+	 * The interrupts are cleared by writing the overflow flags back to
+	 * the control register. All of the other bits don't have any effect
+	 * if they are rewritten, so write the whole value back.
+	 */
+	armv6_pmcr_write(pmcr);
+
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		/* Ignore if we don't have an event. */
+		if (!event)
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cpu_pmu->disable(event);
+	}
+
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static void armv6pmu_start(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags, val;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val |= ARMV6_PMCR_ENABLE;
+	armv6_pmcr_write(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags, val;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~ARMV6_PMCR_ENABLE;
+	armv6_pmcr_write(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	/* Always place a cycle counter into the cycle counter. */
+	if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
+		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return ARMV6_CYCLE_COUNTER;
+	} else {
+		/*
+		 * For anything other than a cycle counter, try and use
+		 * counter0 and counter1.
+		 */
+		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
+			return ARMV6_COUNTER1;
+
+		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
+			return ARMV6_COUNTER0;
+
+		/* The counters are all in use. */
+		return -EAGAIN;
+	}
+}
+
+static void armv6pmu_disable_event(struct perf_event *event)
+{
+	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= ARMV6_PMCR_CCOUNT_IEN;
+		evt	= 0;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
+		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
+		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Mask out the current event and set the counter to count the number
+	 * of ETM bus signal assertion cycles. The external reporting should
+	 * be disabled and so this should never increment.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void armv6mpcore_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long val, mask, flags, evt = 0;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= ARMV6_PMCR_CCOUNT_IEN;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_COUNT0_IEN;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_COUNT1_IEN;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
+	 * simply disable the interrupt reporting.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int armv6_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv6_perf_map,
+				&armv6_perf_cache_map, 0xFF);
+}
+
+static void armv6pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
+	cpu_pmu->enable		= armv6pmu_enable_event;
+	cpu_pmu->disable	= armv6pmu_disable_event;
+	cpu_pmu->read_counter	= armv6pmu_read_counter;
+	cpu_pmu->write_counter	= armv6pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
+	cpu_pmu->start		= armv6pmu_start;
+	cpu_pmu->stop		= armv6pmu_stop;
+	cpu_pmu->map_event	= armv6_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+}
+
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv6pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv6_1136";
+	return 0;
+}
+
+static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv6pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv6_1156";
+	return 0;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv6pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv6_1176";
+	return 0;
+}
+
+/*
+ * ARMv6mpcore is almost identical to single core ARMv6 with the exception
+ * that some of the events have different enumerations and that there is no
+ * *hack* to stop the programmable counters. To stop the counters we simply
+ * disable the interrupt reporting and update the event. When unthrottling we
+ * reset the period and enable the interrupt reporting.
+ */
+
+static int armv6mpcore_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv6mpcore_perf_map,
+				&armv6mpcore_perf_cache_map, 0xFF);
+}
+
+static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name		= "armv6_11mpcore";
+	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
+	cpu_pmu->enable		= armv6pmu_enable_event;
+	cpu_pmu->disable	= armv6mpcore_pmu_disable_event;
+	cpu_pmu->read_counter	= armv6pmu_read_counter;
+	cpu_pmu->write_counter	= armv6pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
+	cpu_pmu->start		= armv6pmu_start;
+	cpu_pmu->stop		= armv6pmu_stop;
+	cpu_pmu->map_event	= armv6mpcore_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
+}
+#else
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+#endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
new file mode 100644
index 000000000..f4207a4dc
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -0,0 +1,1908 @@
+/*
+ * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
+ *
+ * ARMv7 support: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ *
+ * Copied from ARMv6 code, with the low level code inspired
+ *  by the ARMv7 Oprofile code.
+ *
+ * Cortex-A8 has up to 4 configurable performance counters and
+ *  a single cycle counter.
+ * Cortex-A9 has up to 31 configurable performance counters and
+ *  a single cycle counter.
+ *
+ * All counters can be enabled/disabled and IRQ masked separately. The cycle
+ *  counter and all 4 performance counters together can be reset separately.
+ */
+
+#ifdef CONFIG_CPU_V7
+
+#include <asm/cp15.h>
+#include <asm/vfp.h>
+#include "../vfp/vfpinstr.h"
+
+/*
+ * Common ARMv7 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ */
+enum armv7_perf_types {
+	ARMV7_PERFCTR_PMNC_SW_INCR			= 0x00,
+	ARMV7_PERFCTR_L1_ICACHE_REFILL			= 0x01,
+	ARMV7_PERFCTR_ITLB_REFILL			= 0x02,
+	ARMV7_PERFCTR_L1_DCACHE_REFILL			= 0x03,
+	ARMV7_PERFCTR_L1_DCACHE_ACCESS			= 0x04,
+	ARMV7_PERFCTR_DTLB_REFILL			= 0x05,
+	ARMV7_PERFCTR_MEM_READ				= 0x06,
+	ARMV7_PERFCTR_MEM_WRITE				= 0x07,
+	ARMV7_PERFCTR_INSTR_EXECUTED			= 0x08,
+	ARMV7_PERFCTR_EXC_TAKEN				= 0x09,
+	ARMV7_PERFCTR_EXC_EXECUTED			= 0x0A,
+	ARMV7_PERFCTR_CID_WRITE				= 0x0B,
+
+	/*
+	 * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
+	 * It counts:
+	 *  - all (taken) branch instructions,
+	 *  - instructions that explicitly write the PC,
+	 *  - exception generating instructions.
+	 */
+	ARMV7_PERFCTR_PC_WRITE				= 0x0C,
+	ARMV7_PERFCTR_PC_IMM_BRANCH			= 0x0D,
+	ARMV7_PERFCTR_PC_PROC_RETURN			= 0x0E,
+	ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS		= 0x0F,
+	ARMV7_PERFCTR_PC_BRANCH_MIS_PRED		= 0x10,
+	ARMV7_PERFCTR_CLOCK_CYCLES			= 0x11,
+	ARMV7_PERFCTR_PC_BRANCH_PRED			= 0x12,
+
+	/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */
+	ARMV7_PERFCTR_MEM_ACCESS			= 0x13,
+	ARMV7_PERFCTR_L1_ICACHE_ACCESS			= 0x14,
+	ARMV7_PERFCTR_L1_DCACHE_WB			= 0x15,
+	ARMV7_PERFCTR_L2_CACHE_ACCESS			= 0x16,
+	ARMV7_PERFCTR_L2_CACHE_REFILL			= 0x17,
+	ARMV7_PERFCTR_L2_CACHE_WB			= 0x18,
+	ARMV7_PERFCTR_BUS_ACCESS			= 0x19,
+	ARMV7_PERFCTR_MEM_ERROR				= 0x1A,
+	ARMV7_PERFCTR_INSTR_SPEC			= 0x1B,
+	ARMV7_PERFCTR_TTBR_WRITE			= 0x1C,
+	ARMV7_PERFCTR_BUS_CYCLES			= 0x1D,
+
+	ARMV7_PERFCTR_CPU_CYCLES			= 0xFF
+};
+
+/* ARMv7 Cortex-A8 specific event types */
+enum armv7_a8_perf_types {
+	ARMV7_A8_PERFCTR_L2_CACHE_ACCESS		= 0x43,
+	ARMV7_A8_PERFCTR_L2_CACHE_REFILL		= 0x44,
+	ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS		= 0x50,
+	ARMV7_A8_PERFCTR_STALL_ISIDE			= 0x56,
+};
+
+/* ARMv7 Cortex-A9 specific event types */
+enum armv7_a9_perf_types {
+	ARMV7_A9_PERFCTR_INSTR_CORE_RENAME		= 0x68,
+	ARMV7_A9_PERFCTR_STALL_ICACHE			= 0x60,
+	ARMV7_A9_PERFCTR_STALL_DISPATCH			= 0x66,
+};
+
+/* ARMv7 Cortex-A5 specific event types */
+enum armv7_a5_perf_types {
+	ARMV7_A5_PERFCTR_PREFETCH_LINEFILL		= 0xc2,
+	ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP		= 0xc3,
+};
+
+/* ARMv7 Cortex-A15 specific event types */
+enum armv7_a15_perf_types {
+	ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ		= 0x40,
+	ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE	= 0x41,
+	ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ		= 0x42,
+	ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE	= 0x43,
+
+	ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ		= 0x4C,
+	ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE		= 0x4D,
+
+	ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ		= 0x50,
+	ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE		= 0x51,
+	ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ		= 0x52,
+	ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE		= 0x53,
+
+	ARMV7_A15_PERFCTR_PC_WRITE_SPEC			= 0x76,
+};
+
+/* ARMv7 Cortex-A12 specific event types */
+enum armv7_a12_perf_types {
+	ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ		= 0x40,
+	ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE	= 0x41,
+
+	ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ		= 0x50,
+	ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE		= 0x51,
+
+	ARMV7_A12_PERFCTR_PC_WRITE_SPEC			= 0x76,
+
+	ARMV7_A12_PERFCTR_PF_TLB_REFILL			= 0xe7,
+};
+
+/* ARMv7 Krait specific event types */
+enum krait_perf_types {
+	KRAIT_PMRESR0_GROUP0				= 0xcc,
+	KRAIT_PMRESR1_GROUP0				= 0xd0,
+	KRAIT_PMRESR2_GROUP0				= 0xd4,
+	KRAIT_VPMRESR0_GROUP0				= 0xd8,
+
+	KRAIT_PERFCTR_L1_ICACHE_ACCESS			= 0x10011,
+	KRAIT_PERFCTR_L1_ICACHE_MISS			= 0x10010,
+
+	KRAIT_PERFCTR_L1_ITLB_ACCESS			= 0x12222,
+	KRAIT_PERFCTR_L1_DTLB_ACCESS			= 0x12210,
+};
+
+/* ARMv7 Scorpion specific event types */
+enum scorpion_perf_types {
+	SCORPION_LPM0_GROUP0				= 0x4c,
+	SCORPION_LPM1_GROUP0				= 0x50,
+	SCORPION_LPM2_GROUP0				= 0x54,
+	SCORPION_L2LPM_GROUP0				= 0x58,
+	SCORPION_VLPM_GROUP0				= 0x5c,
+
+	SCORPION_ICACHE_ACCESS				= 0x10053,
+	SCORPION_ICACHE_MISS				= 0x10052,
+
+	SCORPION_DTLB_ACCESS				= 0x12013,
+	SCORPION_DTLB_MISS				= 0x12012,
+
+	SCORPION_ITLB_MISS				= 0x12021,
+};
+
+/*
+ * Cortex-A8 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV7_A8_PERFCTR_STALL_ISIDE,
+};
+
+static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A9 HW events mapping
+ */
+static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV7_A9_PERFCTR_INSTR_CORE_RENAME,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV7_A9_PERFCTR_STALL_ICACHE,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV7_A9_PERFCTR_STALL_DISPATCH,
+};
+
+static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A5 HW events mapping
+ */
+static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)]	= ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)]	= ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_ICACHE_REFILL,
+	/*
+	 * The prefetch counters don't differentiate between the I side and the
+	 * D side.
+	 */
+	[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)]	= ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+	[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)]	= ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A15 HW events mapping
+ */
+static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV7_A15_PERFCTR_PC_WRITE_SPEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
+
+	/*
+	 * Not all performance counters differentiate between read and write
+	 * accesses/misses so we're not always strictly correct, but it's the
+	 * best we can do. Writes and reads get combined in these cases.
+	 */
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
+	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
+	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A7 HW events mapping
+ */
+static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_CACHE_ACCESS,
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACHE_REFILL,
+	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_CACHE_ACCESS,
+	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Cortex-A12 HW events mapping
+ */
+static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+	/*
+	 * Not all performance counters differentiate between read and write
+	 * accesses/misses so we're not always strictly correct, but it's the
+	 * best we can do. Writes and reads get combined in these cases.
+	 */
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACHE_REFILL,
+	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
+	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+	[C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)]	= ARMV7_A12_PERFCTR_PF_TLB_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Krait HW events mapping
+ */
+static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= KRAIT_PERFCTR_L1_ICACHE_MISS,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_DTLB_ACCESS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_DTLB_ACCESS,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_ITLB_ACCESS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_ITLB_ACCESS,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Scorpion HW events mapping
+ */
+static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					    [PERF_COUNT_HW_CACHE_OP_MAX]
+					    [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
+	/*
+	 * Only ITLB misses and DTLB refills are supported.  If users want the
+	 * DTLB refills misses a raw counter must be used.
+	 */
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
+ * Perf Events' indices
+ */
+#define	ARMV7_IDX_CYCLE_COUNTER	0
+#define	ARMV7_IDX_COUNTER0	1
+#define	ARMV7_IDX_COUNTER_LAST(cpu_pmu) \
+	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+#define	ARMV7_MAX_COUNTERS	32
+#define	ARMV7_COUNTER_MASK	(ARMV7_MAX_COUNTERS - 1)
+
+/*
+ * ARMv7 low level PMNC access
+ */
+
+/*
+ * Perf Event to low level counters mapping
+ */
+#define	ARMV7_IDX_TO_COUNTER(x)	\
+	(((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK)
+
+/*
+ * Per-CPU PMNC: config reg
+ */
+#define ARMV7_PMNC_E		(1 << 0) /* Enable all counters */
+#define ARMV7_PMNC_P		(1 << 1) /* Reset all counters */
+#define ARMV7_PMNC_C		(1 << 2) /* Cycle counter reset */
+#define ARMV7_PMNC_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV7_PMNC_X		(1 << 4) /* Export to ETM */
+#define ARMV7_PMNC_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
+#define	ARMV7_PMNC_N_SHIFT	11	 /* Number of counters supported */
+#define	ARMV7_PMNC_N_MASK	0x1f
+#define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
+
+/*
+ * FLAG: counters overflow flag status reg
+ */
+#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define	ARMV7_EVTYPE_MASK	0xc80000ff	/* Mask for writable bits */
+#define	ARMV7_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */
+
+/*
+ * Event filters for PMUv2
+ */
+#define	ARMV7_EXCLUDE_PL1	(1 << 31)
+#define	ARMV7_EXCLUDE_USER	(1 << 30)
+#define	ARMV7_INCLUDE_HYP	(1 << 27)
+
+static inline u32 armv7_pmnc_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+	return val;
+}
+
+static inline void armv7_pmnc_write(u32 val)
+{
+	val &= ARMV7_PMNC_MASK;
+	isb();
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
+{
+	return pmnc & ARMV7_OVERFLOWED_MASK;
+}
+
+static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx)
+{
+	return idx >= ARMV7_IDX_CYCLE_COUNTER &&
+		idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu);
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
+{
+	return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
+}
+
+static inline void armv7_pmnc_select_counter(int idx)
+{
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
+	isb();
+}
+
+static inline u32 armv7pmu_read_counter(struct perf_event *event)
+{
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u32 value = 0;
+
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u reading wrong counter %d\n",
+			smp_processor_id(), idx);
+	} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+	} else {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
+	}
+
+	return value;
+}
+
+static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u writing wrong counter %d\n",
+			smp_processor_id(), idx);
+	} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	} else {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
+	}
+}
+
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
+{
+	armv7_pmnc_select_counter(idx);
+	val &= ARMV7_EVTYPE_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+}
+
+static inline void armv7_pmnc_enable_counter(int idx)
+{
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
+}
+
+static inline void armv7_pmnc_disable_counter(int idx)
+{
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
+}
+
+static inline void armv7_pmnc_enable_intens(int idx)
+{
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
+}
+
+static inline void armv7_pmnc_disable_intens(int idx)
+{
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
+	isb();
+	/* Clear the overflow flag in case an interrupt is pending. */
+	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
+	isb();
+}
+
+static inline u32 armv7_pmnc_getreset_flags(void)
+{
+	u32 val;
+
+	/* Read */
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+
+	/* Write to clear flags */
+	val &= ARMV7_FLAG_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+
+	return val;
+}
+
+#ifdef DEBUG
+static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
+{
+	u32 val;
+	unsigned int cnt;
+
+	pr_info("PMNC registers dump:\n");
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+	pr_info("PMNC  =0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
+	pr_info("CNTENS=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
+	pr_info("INTENS=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+	pr_info("FLAGS =0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
+	pr_info("SELECT=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+	pr_info("CCNT  =0x%08x\n", val);
+
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+		pr_info("CNT[%d] count =0x%08x\n",
+			ARMV7_IDX_TO_COUNTER(cnt), val);
+		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+		pr_info("CNT[%d] evtsel=0x%08x\n",
+			ARMV7_IDX_TO_COUNTER(cnt), val);
+	}
+}
+#endif
+
+static void armv7pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return;
+	}
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We only need to set the event for the cycle counter if we
+	 * have the ability to perform event filtering.
+	 */
+	if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/*
+	 * Enable interrupt for this counter
+	 */
+	armv7_pmnc_enable_intens(idx);
+
+	/*
+	 * Enable counter
+	 */
+	armv7_pmnc_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void armv7pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return;
+	}
+
+	/*
+	 * Disable counter and interrupt
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Disable interrupt for this counter
+	 */
+	armv7_pmnc_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
+{
+	u32 pmnc;
+	struct perf_sample_data data;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * Get and reset the IRQ flags
+	 */
+	pmnc = armv7_pmnc_getreset_flags();
+
+	/*
+	 * Did an overflow occur?
+	 */
+	if (!armv7_pmnc_has_overflowed(pmnc))
+		return IRQ_NONE;
+
+	/*
+	 * Handle the counter(s) overflow(s)
+	 */
+	regs = get_irq_regs();
+
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		/* Ignore if we don't have an event. */
+		if (!event)
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cpu_pmu->disable(event);
+	}
+
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static void armv7pmu_start(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	/* Enable all counters */
+	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	/* Disable all counters */
+	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				  struct perf_event *event)
+{
+	int idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT;
+
+	/* Always place a cycle counter into the cycle counter. */
+	if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+		if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return ARMV7_IDX_CYCLE_COUNTER;
+	}
+
+	/*
+	 * For anything other than a cycle counter, try and use
+	 * the events counters
+	 */
+	for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+
+	/* The counters are all in use. */
+	return -EAGAIN;
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+				     struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (attr->exclude_idle)
+		return -EPERM;
+	if (attr->exclude_user)
+		config_base |= ARMV7_EXCLUDE_USER;
+	if (attr->exclude_kernel)
+		config_base |= ARMV7_EXCLUDE_PL1;
+	if (!attr->exclude_hv)
+		config_base |= ARMV7_INCLUDE_HYP;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base = config_base;
+
+	return 0;
+}
+
+static void armv7pmu_reset(void *info)
+{
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
+
+	/* The counter and interrupt enable registers are unknown at reset. */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_disable_counter(idx);
+		armv7_pmnc_disable_intens(idx);
+	}
+
+	/* Initialize & Reset PMNC: C and P bits */
+	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+}
+
+static int armv7_a8_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv7_a8_perf_map,
+				&armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv7_a9_perf_map,
+				&armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv7_a5_perf_map,
+				&armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv7_a15_perf_map,
+				&armv7_a15_perf_cache_map, 0xFF);
+}
+
+static int armv7_a7_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv7_a7_perf_map,
+				&armv7_a7_perf_cache_map, 0xFF);
+}
+
+static int armv7_a12_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv7_a12_perf_map,
+				&armv7_a12_perf_cache_map, 0xFF);
+}
+
+static int krait_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &krait_perf_map,
+				&krait_perf_cache_map, 0xFFFFF);
+}
+
+static int krait_map_event_no_branch(struct perf_event *event)
+{
+	return armpmu_map_event(event, &krait_perf_map_no_branch,
+				&krait_perf_cache_map, 0xFFFFF);
+}
+
+static int scorpion_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &scorpion_perf_map,
+				&scorpion_perf_cache_map, 0xFFFFF);
+}
+
+static void armv7pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
+	cpu_pmu->enable		= armv7pmu_enable_event;
+	cpu_pmu->disable	= armv7pmu_disable_event;
+	cpu_pmu->read_counter	= armv7pmu_read_counter;
+	cpu_pmu->write_counter	= armv7pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv7pmu_get_event_idx;
+	cpu_pmu->start		= armv7pmu_start;
+	cpu_pmu->stop		= armv7pmu_stop;
+	cpu_pmu->reset		= armv7pmu_reset;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+};
+
+static u32 armv7_read_num_pmnc_events(void)
+{
+	u32 nb_cnt;
+
+	/* Read the nb of CNTx counters supported from PMNC */
+	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+
+	/* Add the CPU cycles counter and return */
+	return nb_cnt + 1;
+}
+
+static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_cortex_a8";
+	cpu_pmu->map_event	= armv7_a8_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
+}
+
+static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_cortex_a9";
+	cpu_pmu->map_event	= armv7_a9_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
+}
+
+static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_cortex_a5";
+	cpu_pmu->map_event	= armv7_a5_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
+}
+
+static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_cortex_a15";
+	cpu_pmu->map_event	= armv7_a15_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
+}
+
+static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_cortex_a7";
+	cpu_pmu->map_event	= armv7_a7_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
+}
+
+static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_cortex_a12";
+	cpu_pmu->map_event	= armv7_a12_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
+}
+
+static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7_a12_pmu_init(cpu_pmu);
+	cpu_pmu->name = "armv7_cortex_a17";
+	return 0;
+}
+
+/*
+ * Krait Performance Monitor Region Event Selection Register (PMRESRn)
+ *
+ *            31   30     24     16     8      0
+ *            +--------------------------------+
+ *  PMRESR0   | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 0
+ *            +--------------------------------+
+ *  PMRESR1   | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 1
+ *            +--------------------------------+
+ *  PMRESR2   | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 2
+ *            +--------------------------------+
+ *  VPMRESR0  | EN |  CC  |  CC  |  CC  |  CC  |   N = 2, R = ?
+ *            +--------------------------------+
+ *              EN | G=3  | G=2  | G=1  | G=0
+ *
+ *  Event Encoding:
+ *
+ *      hwc->config_base = 0xNRCCG
+ *
+ *      N  = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR)
+ *      R  = region register
+ *      CC = class of events the group G is choosing from
+ *      G  = group or particular event
+ *
+ *  Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2
+ *
+ *  A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ *  unit, etc.) while the event code (CC) corresponds to a particular class of
+ *  events (interrupts for example). An event code is broken down into
+ *  groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ *  example).
+ */
+
+#define KRAIT_EVENT		(1 << 16)
+#define VENUM_EVENT		(2 << 16)
+#define KRAIT_EVENT_MASK	(KRAIT_EVENT | VENUM_EVENT)
+#define PMRESRn_EN		BIT(31)
+
+#define EVENT_REGION(event)	(((event) >> 12) & 0xf)		/* R */
+#define EVENT_GROUP(event)	((event) & 0xf)			/* G */
+#define EVENT_CODE(event)	(((event) >> 4) & 0xff)		/* CC */
+#define EVENT_VENUM(event)	(!!(event & VENUM_EVENT))	/* N=2 */
+#define EVENT_CPU(event)	(!!(event & KRAIT_EVENT))	/* N=1 */
+
+static u32 krait_read_pmresrn(int n)
+{
+	u32 val;
+
+	switch (n) {
+	case 0:
+		asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val));
+		break;
+	case 1:
+		asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val));
+		break;
+	case 2:
+		asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+	}
+
+	return val;
+}
+
+static void krait_write_pmresrn(int n, u32 val)
+{
+	switch (n) {
+	case 0:
+		asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val));
+		break;
+	case 1:
+		asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val));
+		break;
+	case 2:
+		asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+	}
+}
+
+static u32 venum_read_pmresr(void)
+{
+	u32 val;
+	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void venum_write_pmresr(u32 val)
+{
+	asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
+{
+	u32 venum_new_val;
+	u32 fp_new_val;
+
+	BUG_ON(preemptible());
+	/* CPACR Enable CP10 and CP11 access */
+	*venum_orig_val = get_copro_access();
+	venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11);
+	set_copro_access(venum_new_val);
+
+	/* Enable FPEXC */
+	*fp_orig_val = fmrx(FPEXC);
+	fp_new_val = *fp_orig_val | FPEXC_EN;
+	fmxr(FPEXC, fp_new_val);
+}
+
+static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
+{
+	BUG_ON(preemptible());
+	/* Restore FPEXC */
+	fmxr(FPEXC, fp_orig_val);
+	isb();
+	/* Restore CPACR */
+	set_copro_access(venum_orig_val);
+}
+
+static u32 krait_get_pmresrn_event(unsigned int region)
+{
+	static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0,
+					     KRAIT_PMRESR1_GROUP0,
+					     KRAIT_PMRESR2_GROUP0 };
+	return pmresrn_table[region];
+}
+
+static void krait_evt_setup(int idx, u32 config_base)
+{
+	u32 val;
+	u32 mask;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
+	unsigned int group_shift;
+	bool venum_event = EVENT_VENUM(config_base);
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+
+	/* Configure evtsel for the region and group */
+	if (venum_event)
+		val = KRAIT_VPMRESR0_GROUP0;
+	else
+		val = krait_get_pmresrn_event(region);
+	val += group;
+	/* Mix in mode-exclusion bits */
+	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+	armv7_pmnc_write_evtsel(idx, val);
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = krait_read_pmresrn(region);
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		krait_write_pmresrn(region, val);
+	}
+}
+
+static u32 clear_pmresrn_group(u32 val, int group)
+{
+	u32 mask;
+	int group_shift;
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+	val &= ~mask;
+
+	/* Don't clear enable bit if entire region isn't disabled */
+	if (val & ~PMRESRn_EN)
+		return val |= PMRESRn_EN;
+
+	return 0;
+}
+
+static void krait_clearpmu(u32 config_base)
+{
+	u32 val;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = krait_read_pmresrn(region);
+		val = clear_pmresrn_group(val, group);
+		krait_write_pmresrn(region, val);
+	}
+}
+
+static void krait_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/* Disable counter and interrupt */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Clear pmresr code (if destined for PMNx counters)
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		krait_clearpmu(hwc->config_base);
+
+	/* Disable interrupt for this counter */
+	armv7_pmnc_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void krait_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We set the event for the cycle counter because we
+	 * have the ability to perform event filtering.
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		krait_evt_setup(idx, hwc->config_base);
+	else
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/* Enable interrupt for this counter */
+	armv7_pmnc_enable_intens(idx);
+
+	/* Enable counter */
+	armv7_pmnc_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void krait_pmu_reset(void *info)
+{
+	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
+
+	armv7pmu_reset(info);
+
+	/* Clear all pmresrs */
+	krait_write_pmresrn(0, 0);
+	krait_write_pmresrn(1, 0);
+	krait_write_pmresrn(2, 0);
+
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
+
+}
+
+static int krait_event_to_bit(struct perf_event *event, unsigned int region,
+			      unsigned int group)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+	if (hwc->config_base & VENUM_EVENT)
+		bit = KRAIT_VPMRESR0_GROUP0;
+	else
+		bit = krait_get_pmresrn_event(region);
+	bit -= krait_get_pmresrn_event(0);
+	bit += group;
+	/*
+	 * Lower bits are reserved for use by the counters (see
+	 * armv7pmu_get_event_idx() for more info)
+	 */
+	bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+	return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	int idx;
+	int bit = -1;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int code = EVENT_CODE(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || krait_event) {
+		/* Ignore invalid events */
+		if (group > 3 || region > 2)
+			return -EINVAL;
+		if (venum_event && (code & 0xe0))
+			return -EINVAL;
+
+		bit = krait_event_to_bit(event, region, group);
+		if (test_and_set_bit(bit, cpuc->used_mask))
+			return -EAGAIN;
+	}
+
+	idx = armv7pmu_get_event_idx(cpuc, event);
+	if (idx < 0 && bit >= 0)
+		clear_bit(bit, cpuc->used_mask);
+
+	return idx;
+}
+
+static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || krait_event) {
+		bit = krait_event_to_bit(event, region, group);
+		clear_bit(bit, cpuc->used_mask);
+	}
+}
+
+static int krait_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_krait";
+	/* Some early versions of Krait don't support PC write events */
+	if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
+				  "qcom,no-pc-write"))
+		cpu_pmu->map_event = krait_map_event_no_branch;
+	else
+		cpu_pmu->map_event = krait_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	cpu_pmu->reset		= krait_pmu_reset;
+	cpu_pmu->enable		= krait_pmu_enable_event;
+	cpu_pmu->disable	= krait_pmu_disable_event;
+	cpu_pmu->get_event_idx	= krait_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
+	return 0;
+}
+
+/*
+ * Scorpion Local Performance Monitor Register (LPMn)
+ *
+ *            31   30     24     16     8      0
+ *            +--------------------------------+
+ *  LPM0      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 0
+ *            +--------------------------------+
+ *  LPM1      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 1
+ *            +--------------------------------+
+ *  LPM2      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 2
+ *            +--------------------------------+
+ *  L2LPM     | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 3
+ *            +--------------------------------+
+ *  VLPM      | EN |  CC  |  CC  |  CC  |  CC  |   N = 2, R = ?
+ *            +--------------------------------+
+ *              EN | G=3  | G=2  | G=1  | G=0
+ *
+ *
+ *  Event Encoding:
+ *
+ *      hwc->config_base = 0xNRCCG
+ *
+ *      N  = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
+ *      R  = region register
+ *      CC = class of events the group G is choosing from
+ *      G  = group or particular event
+ *
+ *  Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
+ *
+ *  A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ *  unit, etc.) while the event code (CC) corresponds to a particular class of
+ *  events (interrupts for example). An event code is broken down into
+ *  groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ *  example).
+ */
+
+static u32 scorpion_read_pmresrn(int n)
+{
+	u32 val;
+
+	switch (n) {
+	case 0:
+		asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 1:
+		asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 2:
+		asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 3:
+		asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
+
+	return val;
+}
+
+static void scorpion_write_pmresrn(int n, u32 val)
+{
+	switch (n) {
+	case 0:
+		asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 1:
+		asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 2:
+		asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 3:
+		asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
+}
+
+static u32 scorpion_get_pmresrn_event(unsigned int region)
+{
+	static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
+					     SCORPION_LPM1_GROUP0,
+					     SCORPION_LPM2_GROUP0,
+					     SCORPION_L2LPM_GROUP0 };
+	return pmresrn_table[region];
+}
+
+static void scorpion_evt_setup(int idx, u32 config_base)
+{
+	u32 val;
+	u32 mask;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
+	unsigned int group_shift;
+	bool venum_event = EVENT_VENUM(config_base);
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+
+	/* Configure evtsel for the region and group */
+	if (venum_event)
+		val = SCORPION_VLPM_GROUP0;
+	else
+		val = scorpion_get_pmresrn_event(region);
+	val += group;
+	/* Mix in mode-exclusion bits */
+	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+	armv7_pmnc_write_evtsel(idx, val);
+
+	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		scorpion_write_pmresrn(region, val);
+	}
+}
+
+static void scorpion_clearpmu(u32 config_base)
+{
+	u32 val;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val = clear_pmresrn_group(val, group);
+		scorpion_write_pmresrn(region, val);
+	}
+}
+
+static void scorpion_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/* Disable counter and interrupt */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Clear pmresr code (if destined for PMNx counters)
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_clearpmu(hwc->config_base);
+
+	/* Disable interrupt for this counter */
+	armv7_pmnc_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void scorpion_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We don't set the event for the cycle counter because we
+	 * don't have the ability to perform event filtering.
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_evt_setup(idx, hwc->config_base);
+	else if (idx != ARMV7_IDX_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/* Enable interrupt for this counter */
+	armv7_pmnc_enable_intens(idx);
+
+	/* Enable counter */
+	armv7_pmnc_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void scorpion_pmu_reset(void *info)
+{
+	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
+
+	armv7pmu_reset(info);
+
+	/* Clear all pmresrs */
+	scorpion_write_pmresrn(0, 0);
+	scorpion_write_pmresrn(1, 0);
+	scorpion_write_pmresrn(2, 0);
+	scorpion_write_pmresrn(3, 0);
+
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
+}
+
+static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
+			      unsigned int group)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+	if (hwc->config_base & VENUM_EVENT)
+		bit = SCORPION_VLPM_GROUP0;
+	else
+		bit = scorpion_get_pmresrn_event(region);
+	bit -= scorpion_get_pmresrn_event(0);
+	bit += group;
+	/*
+	 * Lower bits are reserved for use by the counters (see
+	 * armv7pmu_get_event_idx() for more info)
+	 */
+	bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+	return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	int idx;
+	int bit = -1;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		/* Ignore invalid events */
+		if (group > 3 || region > 3)
+			return -EINVAL;
+
+		bit = scorpion_event_to_bit(event, region, group);
+		if (test_and_set_bit(bit, cpuc->used_mask))
+			return -EAGAIN;
+	}
+
+	idx = armv7pmu_get_event_idx(cpuc, event);
+	if (idx < 0 && bit >= 0)
+		clear_bit(bit, cpuc->used_mask);
+
+	return idx;
+}
+
+static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		bit = scorpion_event_to_bit(event, region, group);
+		clear_bit(bit, cpuc->used_mask);
+	}
+}
+
+static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return 0;
+}
+
+static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion_mp";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return 0;
+}
+#else
+static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+#endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
new file mode 100644
index 000000000..8af9f1f82
--- /dev/null
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -0,0 +1,753 @@
+/*
+ * ARMv5 [xscale] Performance counter handling code.
+ *
+ * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * Based on the previous xscale OProfile code.
+ *
+ * There are two variants of the xscale PMU that we support:
+ * 	- xscale1pmu: 2 event counters and a cycle counter
+ * 	- xscale2pmu: 4 event counters and a cycle counter
+ * The two variants share event definitions, but have different
+ * PMU structures.
+ */
+
+#ifdef CONFIG_CPU_XSCALE
+enum xscale_perf_types {
+	XSCALE_PERFCTR_ICACHE_MISS		= 0x00,
+	XSCALE_PERFCTR_ICACHE_NO_DELIVER	= 0x01,
+	XSCALE_PERFCTR_DATA_STALL		= 0x02,
+	XSCALE_PERFCTR_ITLB_MISS		= 0x03,
+	XSCALE_PERFCTR_DTLB_MISS		= 0x04,
+	XSCALE_PERFCTR_BRANCH			= 0x05,
+	XSCALE_PERFCTR_BRANCH_MISS		= 0x06,
+	XSCALE_PERFCTR_INSTRUCTION		= 0x07,
+	XSCALE_PERFCTR_DCACHE_FULL_STALL	= 0x08,
+	XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG	= 0x09,
+	XSCALE_PERFCTR_DCACHE_ACCESS		= 0x0A,
+	XSCALE_PERFCTR_DCACHE_MISS		= 0x0B,
+	XSCALE_PERFCTR_DCACHE_WRITE_BACK	= 0x0C,
+	XSCALE_PERFCTR_PC_CHANGED		= 0x0D,
+	XSCALE_PERFCTR_BCU_REQUEST		= 0x10,
+	XSCALE_PERFCTR_BCU_FULL			= 0x11,
+	XSCALE_PERFCTR_BCU_DRAIN		= 0x12,
+	XSCALE_PERFCTR_BCU_ECC_NO_ELOG		= 0x14,
+	XSCALE_PERFCTR_BCU_1_BIT_ERR		= 0x15,
+	XSCALE_PERFCTR_RMW			= 0x16,
+	/* XSCALE_PERFCTR_CCNT is not hardware defined */
+	XSCALE_PERFCTR_CCNT			= 0xFE,
+	XSCALE_PERFCTR_UNUSED			= 0xFF,
+};
+
+enum xscale_counters {
+	XSCALE_CYCLE_COUNTER	= 0,
+	XSCALE_COUNTER0,
+	XSCALE_COUNTER1,
+	XSCALE_COUNTER2,
+	XSCALE_COUNTER3,
+};
+
+static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= XSCALE_PERFCTR_CCNT,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= XSCALE_PERFCTR_INSTRUCTION,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XSCALE_PERFCTR_BRANCH,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= XSCALE_PERFCTR_BRANCH_MISS,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XSCALE_PERFCTR_ICACHE_NO_DELIVER,
+};
+
+static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					   [PERF_COUNT_HW_CACHE_OP_MAX]
+					   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
+};
+
+#define	XSCALE_PMU_ENABLE	0x001
+#define XSCALE_PMN_RESET	0x002
+#define	XSCALE_CCNT_RESET	0x004
+#define	XSCALE_PMU_RESET	(CCNT_RESET | PMN_RESET)
+#define XSCALE_PMU_CNT64	0x008
+
+#define XSCALE1_OVERFLOWED_MASK	0x700
+#define XSCALE1_CCOUNT_OVERFLOW	0x400
+#define XSCALE1_COUNT0_OVERFLOW	0x100
+#define XSCALE1_COUNT1_OVERFLOW	0x200
+#define XSCALE1_CCOUNT_INT_EN	0x040
+#define XSCALE1_COUNT0_INT_EN	0x010
+#define XSCALE1_COUNT1_INT_EN	0x020
+#define XSCALE1_COUNT0_EVT_SHFT	12
+#define XSCALE1_COUNT0_EVT_MASK	(0xff << XSCALE1_COUNT0_EVT_SHFT)
+#define XSCALE1_COUNT1_EVT_SHFT	20
+#define XSCALE1_COUNT1_EVT_MASK	(0xff << XSCALE1_COUNT1_EVT_SHFT)
+
+static inline u32
+xscale1pmu_read_pmnc(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale1pmu_write_pmnc(u32 val)
+{
+	/* upper 4bits and 7, 11 are write-as-0 */
+	val &= 0xffff77f;
+	asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
+}
+
+static inline int
+xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
+					enum xscale_counters counter)
+{
+	int ret = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
+		break;
+	case XSCALE_COUNTER0:
+		ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
+		break;
+	case XSCALE_COUNTER1:
+		ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+xscale1pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc;
+	struct perf_sample_data data;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * NOTE: there's an A stepping erratum that states if an overflow
+	 *       bit already exists and another occurs, the previous
+	 *       Overflow bit gets cleared. There's no workaround.
+	 *	 Fixed in B stepping or later.
+	 */
+	pmnc = xscale1pmu_read_pmnc();
+
+	/*
+	 * Write the value back to clear the overflow flags. Overflow
+	 * flags remain in pmnc for use below. We also disable the PMU
+	 * while we process the interrupt.
+	 */
+	xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+	if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
+		return IRQ_NONE;
+
+	regs = get_irq_regs();
+
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!event)
+			continue;
+
+		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cpu_pmu->disable(event);
+	}
+
+	irq_work_run();
+
+	/*
+	 * Re-enable the PMU.
+	 */
+	pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(pmnc);
+
+	return IRQ_HANDLED;
+}
+
+static void xscale1pmu_enable_event(struct perf_event *event)
+{
+	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		mask = 0;
+		evt = XSCALE1_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		mask = XSCALE1_COUNT0_EVT_MASK;
+		evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
+			XSCALE1_COUNT0_INT_EN;
+		break;
+	case XSCALE_COUNTER1:
+		mask = XSCALE1_COUNT1_EVT_MASK;
+		evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
+			XSCALE1_COUNT1_INT_EN;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~mask;
+	val |= evt;
+	xscale1pmu_write_pmnc(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void xscale1pmu_disable_event(struct perf_event *event)
+{
+	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		mask = XSCALE1_CCOUNT_INT_EN;
+		evt = 0;
+		break;
+	case XSCALE_COUNTER0:
+		mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
+		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
+		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~mask;
+	val |= evt;
+	xscale1pmu_write_pmnc(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	if (XSCALE_PERFCTR_CCNT == hwc->config_base) {
+		if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return XSCALE_CYCLE_COUNTER;
+	} else {
+		if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
+			return XSCALE_COUNTER1;
+
+		if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
+			return XSCALE_COUNTER0;
+
+		return -EAGAIN;
+	}
+}
+
+static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags, val;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val |= XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags, val;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static inline u32 xscale1pmu_read_counter(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+	u32 val = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
+		break;
+	}
+
+	return val;
+}
+
+static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
+		break;
+	}
+}
+
+static int xscale_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &xscale_perf_map,
+				&xscale_perf_cache_map, 0xFF);
+}
+
+static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name		= "armv5_xscale1";
+	cpu_pmu->handle_irq	= xscale1pmu_handle_irq;
+	cpu_pmu->enable		= xscale1pmu_enable_event;
+	cpu_pmu->disable	= xscale1pmu_disable_event;
+	cpu_pmu->read_counter	= xscale1pmu_read_counter;
+	cpu_pmu->write_counter	= xscale1pmu_write_counter;
+	cpu_pmu->get_event_idx	= xscale1pmu_get_event_idx;
+	cpu_pmu->start		= xscale1pmu_start;
+	cpu_pmu->stop		= xscale1pmu_stop;
+	cpu_pmu->map_event	= xscale_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
+}
+
+#define XSCALE2_OVERFLOWED_MASK	0x01f
+#define XSCALE2_CCOUNT_OVERFLOW	0x001
+#define XSCALE2_COUNT0_OVERFLOW	0x002
+#define XSCALE2_COUNT1_OVERFLOW	0x004
+#define XSCALE2_COUNT2_OVERFLOW	0x008
+#define XSCALE2_COUNT3_OVERFLOW	0x010
+#define XSCALE2_CCOUNT_INT_EN	0x001
+#define XSCALE2_COUNT0_INT_EN	0x002
+#define XSCALE2_COUNT1_INT_EN	0x004
+#define XSCALE2_COUNT2_INT_EN	0x008
+#define XSCALE2_COUNT3_INT_EN	0x010
+#define XSCALE2_COUNT0_EVT_SHFT	0
+#define XSCALE2_COUNT0_EVT_MASK	(0xff << XSCALE2_COUNT0_EVT_SHFT)
+#define XSCALE2_COUNT1_EVT_SHFT	8
+#define XSCALE2_COUNT1_EVT_MASK	(0xff << XSCALE2_COUNT1_EVT_SHFT)
+#define XSCALE2_COUNT2_EVT_SHFT	16
+#define XSCALE2_COUNT2_EVT_MASK	(0xff << XSCALE2_COUNT2_EVT_SHFT)
+#define XSCALE2_COUNT3_EVT_SHFT	24
+#define XSCALE2_COUNT3_EVT_MASK	(0xff << XSCALE2_COUNT3_EVT_SHFT)
+
+static inline u32
+xscale2pmu_read_pmnc(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
+	/* bits 1-2 and 4-23 are read-unpredictable */
+	return val & 0xff000009;
+}
+
+static inline void
+xscale2pmu_write_pmnc(u32 val)
+{
+	/* bits 4-23 are write-as-0, 24-31 are write ignored */
+	val &= 0xf;
+	asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_overflow_flags(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale2pmu_write_overflow_flags(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_event_select(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale2pmu_write_event_select(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
+}
+
+static inline u32
+xscale2pmu_read_int_enable(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
+	return val;
+}
+
+static void
+xscale2pmu_write_int_enable(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
+}
+
+static inline int
+xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
+					enum xscale_counters counter)
+{
+	int ret = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
+		break;
+	case XSCALE_COUNTER0:
+		ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
+		break;
+	case XSCALE_COUNTER1:
+		ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
+		break;
+	case XSCALE_COUNTER2:
+		ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
+		break;
+	case XSCALE_COUNTER3:
+		ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+xscale2pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc, of_flags;
+	struct perf_sample_data data;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	int idx;
+
+	/* Disable the PMU. */
+	pmnc = xscale2pmu_read_pmnc();
+	xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+	/* Check the overflow flag register. */
+	of_flags = xscale2pmu_read_overflow_flags();
+	if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
+		return IRQ_NONE;
+
+	/* Clear the overflow bits. */
+	xscale2pmu_write_overflow_flags(of_flags);
+
+	regs = get_irq_regs();
+
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!event)
+			continue;
+
+		if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cpu_pmu->disable(event);
+	}
+
+	irq_work_run();
+
+	/*
+	 * Re-enable the PMU.
+	 */
+	pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(pmnc);
+
+	return IRQ_HANDLED;
+}
+
+static void xscale2pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags, ien, evtsel;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	ien = xscale2pmu_read_int_enable();
+	evtsel = xscale2pmu_read_event_select();
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		ien |= XSCALE2_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		ien |= XSCALE2_COUNT0_INT_EN;
+		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		ien |= XSCALE2_COUNT1_INT_EN;
+		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER2:
+		ien |= XSCALE2_COUNT2_INT_EN;
+		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER3:
+		ien |= XSCALE2_COUNT3_INT_EN;
+		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	xscale2pmu_write_event_select(evtsel);
+	xscale2pmu_write_int_enable(ien);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void xscale2pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags, ien, evtsel, of_flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
+
+	ien = xscale2pmu_read_int_enable();
+	evtsel = xscale2pmu_read_event_select();
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		ien &= ~XSCALE2_CCOUNT_INT_EN;
+		of_flags = XSCALE2_CCOUNT_OVERFLOW;
+		break;
+	case XSCALE_COUNTER0:
+		ien &= ~XSCALE2_COUNT0_INT_EN;
+		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
+		of_flags = XSCALE2_COUNT0_OVERFLOW;
+		break;
+	case XSCALE_COUNTER1:
+		ien &= ~XSCALE2_COUNT1_INT_EN;
+		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
+		of_flags = XSCALE2_COUNT1_OVERFLOW;
+		break;
+	case XSCALE_COUNTER2:
+		ien &= ~XSCALE2_COUNT2_INT_EN;
+		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
+		of_flags = XSCALE2_COUNT2_OVERFLOW;
+		break;
+	case XSCALE_COUNTER3:
+		ien &= ~XSCALE2_COUNT3_INT_EN;
+		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
+		of_flags = XSCALE2_COUNT3_OVERFLOW;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	xscale2pmu_write_event_select(evtsel);
+	xscale2pmu_write_int_enable(ien);
+	xscale2pmu_write_overflow_flags(of_flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				struct perf_event *event)
+{
+	int idx = xscale1pmu_get_event_idx(cpuc, event);
+	if (idx >= 0)
+		goto out;
+
+	if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
+		idx = XSCALE_COUNTER3;
+	else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
+		idx = XSCALE_COUNTER2;
+out:
+	return idx;
+}
+
+static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags, val;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
+	val |= XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	unsigned long flags, val;
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	val = xscale2pmu_read_pmnc();
+	val &= ~XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(val);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static inline u32 xscale2pmu_read_counter(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+	u32 val = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER2:
+		asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER3:
+		asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
+		break;
+	}
+
+	return val;
+}
+
+static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER2:
+		asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER3:
+		asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
+		break;
+	}
+}
+
+static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name		= "armv5_xscale2";
+	cpu_pmu->handle_irq	= xscale2pmu_handle_irq;
+	cpu_pmu->enable		= xscale2pmu_enable_event;
+	cpu_pmu->disable	= xscale2pmu_disable_event;
+	cpu_pmu->read_counter	= xscale2pmu_read_counter;
+	cpu_pmu->write_counter	= xscale2pmu_write_counter;
+	cpu_pmu->get_event_idx	= xscale2pmu_get_event_idx;
+	cpu_pmu->start		= xscale2pmu_start;
+	cpu_pmu->stop		= xscale2pmu_stop;
+	cpu_pmu->map_event	= xscale_map_event;
+	cpu_pmu->num_events	= 5;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
+}
+#else
+static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+#endif	/* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c
new file mode 100644
index 000000000..592dda3f2
--- /dev/null
+++ b/arch/arm/kernel/perf_regs.c
@@ -0,0 +1,38 @@
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM_MAX))
+		return 0;
+
+	return regs->uregs[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_ARM_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_32;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
new file mode 100644
index 000000000..8153e36b2
--- /dev/null
+++ b/arch/arm/kernel/pj4-cp0.c
@@ -0,0 +1,133 @@
+/*
+ * linux/arch/arm/kernel/pj4-cp0.c
+ *
+ * PJ4 iWMMXt coprocessor context switching and handling
+ *
+ * Copyright (c) 2010 Marvell International Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/thread_notify.h>
+#include <asm/cputype.h>
+
+static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t)
+{
+	struct thread_info *thread = t;
+
+	switch (cmd) {
+	case THREAD_NOTIFY_FLUSH:
+		/*
+		 * flush_thread() zeroes thread->fpstate, so no need
+		 * to do anything here.
+		 *
+		 * FALLTHROUGH: Ensure we don't try to overwrite our newly
+		 * initialised state information on the first fault.
+		 */
+
+	case THREAD_NOTIFY_EXIT:
+		iwmmxt_task_release(thread);
+		break;
+
+	case THREAD_NOTIFY_SWITCH:
+		iwmmxt_task_switch(thread);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block __maybe_unused iwmmxt_notifier_block = {
+	.notifier_call	= iwmmxt_do,
+};
+
+
+static u32 __init pj4_cp_access_read(void)
+{
+	u32 value;
+
+	__asm__ __volatile__ (
+		"mrc	p15, 0, %0, c1, c0, 2\n\t"
+		: "=r" (value));
+	return value;
+}
+
+static void __init pj4_cp_access_write(u32 value)
+{
+	u32 temp;
+
+	__asm__ __volatile__ (
+		"mcr	p15, 0, %1, c1, c0, 2\n\t"
+		"mrc	p15, 0, %0, c1, c0, 2\n\t"
+		"mov	%0, %0\n\t"
+		"sub	pc, pc, #4\n\t"
+		: "=r" (temp) : "r" (value));
+}
+
+static int __init pj4_get_iwmmxt_version(void)
+{
+	u32 cp_access, wcid;
+
+	cp_access = pj4_cp_access_read();
+	pj4_cp_access_write(cp_access | 0xf);
+
+	/* check if coprocessor 0 and 1 are available */
+	if ((pj4_cp_access_read() & 0xf) != 0xf) {
+		pj4_cp_access_write(cp_access);
+		return -ENODEV;
+	}
+
+	/* read iWMMXt coprocessor id register p1, c0 */
+	__asm__ __volatile__ ("mrc    p1, 0, %0, c0, c0, 0\n" : "=r" (wcid));
+
+	pj4_cp_access_write(cp_access);
+
+	/* iWMMXt v1 */
+	if ((wcid & 0xffffff00) == 0x56051000)
+		return 1;
+	/* iWMMXt v2 */
+	if ((wcid & 0xffffff00) == 0x56052000)
+		return 2;
+
+	return -EINVAL;
+}
+
+/*
+ * Disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy
+ * switch code handle iWMMXt context switching.
+ */
+static int __init pj4_cp0_init(void)
+{
+	u32 __maybe_unused cp_access;
+	int vers;
+
+	if (!cpu_is_pj4())
+		return 0;
+
+	vers = pj4_get_iwmmxt_version();
+	if (vers < 0)
+		return 0;
+
+#ifndef CONFIG_IWMMXT
+	pr_info("PJ4 iWMMXt coprocessor detected, but kernel support is missing.\n");
+#else
+	cp_access = pj4_cp_access_read() & ~0xf;
+	pj4_cp_access_write(cp_access);
+
+	pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers);
+	elf_hwcap |= HWCAP_IWMMXT;
+	thread_register_notifier(&iwmmxt_notifier_block);
+#endif
+
+	return 0;
+}
+
+late_initcall(pj4_cp0_init);
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
new file mode 100644
index 000000000..f192a2a41
--- /dev/null
+++ b/arch/arm/kernel/process.c
@@ -0,0 +1,423 @@
+/*
+ *  linux/arch/arm/kernel/process.c
+ *
+ *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ *  Original Copyright (C) 1995  Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <stdarg.h>
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+#include <linux/elfcore.h>
+#include <linux/pm.h>
+#include <linux/tick.h>
+#include <linux/utsname.h>
+#include <linux/uaccess.h>
+#include <linux/random.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/leds.h>
+
+#include <asm/processor.h>
+#include <asm/thread_notify.h>
+#include <asm/stacktrace.h>
+#include <asm/system_misc.h>
+#include <asm/mach/time.h>
+#include <asm/tls.h>
+#include <asm/vdso.h>
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
+static const char *processor_modes[] __maybe_unused = {
+  "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
+  "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
+  "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "MON_32" , "ABT_32" ,
+  "UK8_32" , "UK9_32" , "HYP_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
+};
+
+static const char *isa_modes[] __maybe_unused = {
+  "ARM" , "Thumb" , "Jazelle", "ThumbEE"
+};
+
+/*
+ * This is our default idle handler.
+ */
+
+void (*arm_pm_idle)(void);
+
+/*
+ * Called from the core idle loop.
+ */
+
+void arch_cpu_idle(void)
+{
+	if (arm_pm_idle)
+		arm_pm_idle();
+	else
+		cpu_do_idle();
+	local_irq_enable();
+}
+
+void arch_cpu_idle_prepare(void)
+{
+	local_fiq_enable();
+}
+
+void arch_cpu_idle_enter(void)
+{
+	ledtrig_cpu(CPU_LED_IDLE_START);
+#ifdef CONFIG_PL310_ERRATA_769419
+	wmb();
+#endif
+}
+
+void arch_cpu_idle_exit(void)
+{
+	ledtrig_cpu(CPU_LED_IDLE_END);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+	cpu_die();
+}
+#endif
+
+void __show_regs(struct pt_regs *regs)
+{
+	unsigned long flags;
+	char buf[64];
+
+	show_regs_print_info(KERN_DEFAULT);
+
+	print_symbol("PC is at %s\n", instruction_pointer(regs));
+	print_symbol("LR is at %s\n", regs->ARM_lr);
+	printk("pc : [<%08lx>]    lr : [<%08lx>]    psr: %08lx\n"
+	       "sp : %08lx  ip : %08lx  fp : %08lx\n",
+		regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr,
+		regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
+	printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
+		regs->ARM_r10, regs->ARM_r9,
+		regs->ARM_r8);
+	printk("r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
+		regs->ARM_r7, regs->ARM_r6,
+		regs->ARM_r5, regs->ARM_r4);
+	printk("r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
+		regs->ARM_r3, regs->ARM_r2,
+		regs->ARM_r1, regs->ARM_r0);
+
+	flags = regs->ARM_cpsr;
+	buf[0] = flags & PSR_N_BIT ? 'N' : 'n';
+	buf[1] = flags & PSR_Z_BIT ? 'Z' : 'z';
+	buf[2] = flags & PSR_C_BIT ? 'C' : 'c';
+	buf[3] = flags & PSR_V_BIT ? 'V' : 'v';
+	buf[4] = '\0';
+
+#ifndef CONFIG_CPU_V7M
+	printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
+		buf, interrupts_enabled(regs) ? "n" : "ff",
+		fast_interrupts_enabled(regs) ? "n" : "ff",
+		processor_modes[processor_mode(regs)],
+		isa_modes[isa_mode(regs)],
+		get_fs() == get_ds() ? "kernel" : "user");
+#else
+	printk("xPSR: %08lx\n", regs->ARM_cpsr);
+#endif
+
+#ifdef CONFIG_CPU_CP15
+	{
+		unsigned int ctrl;
+
+		buf[0] = '\0';
+#ifdef CONFIG_CPU_CP15_MMU
+		{
+			unsigned int transbase, dac;
+			asm("mrc p15, 0, %0, c2, c0\n\t"
+			    "mrc p15, 0, %1, c3, c0\n"
+			    : "=r" (transbase), "=r" (dac));
+			snprintf(buf, sizeof(buf), "  Table: %08x  DAC: %08x",
+			  	transbase, dac);
+		}
+#endif
+		asm("mrc p15, 0, %0, c1, c0\n" : "=r" (ctrl));
+
+		printk("Control: %08x%s\n", ctrl, buf);
+	}
+#endif
+}
+
+void show_regs(struct pt_regs * regs)
+{
+	__show_regs(regs);
+	dump_stack();
+}
+
+ATOMIC_NOTIFIER_HEAD(thread_notify_head);
+
+EXPORT_SYMBOL_GPL(thread_notify_head);
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+	thread_notify(THREAD_NOTIFY_EXIT, current_thread_info());
+}
+
+void flush_thread(void)
+{
+	struct thread_info *thread = current_thread_info();
+	struct task_struct *tsk = current;
+
+	flush_ptrace_hw_breakpoint(tsk);
+
+	memset(thread->used_cp, 0, sizeof(thread->used_cp));
+	memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
+	memset(&thread->fpstate, 0, sizeof(union fp_state));
+
+	flush_tls();
+
+	thread_notify(THREAD_NOTIFY_FLUSH, thread);
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+int
+copy_thread(unsigned long clone_flags, unsigned long stack_start,
+	    unsigned long stk_sz, struct task_struct *p)
+{
+	struct thread_info *thread = task_thread_info(p);
+	struct pt_regs *childregs = task_pt_regs(p);
+
+	memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
+
+	if (likely(!(p->flags & PF_KTHREAD))) {
+		*childregs = *current_pt_regs();
+		childregs->ARM_r0 = 0;
+		if (stack_start)
+			childregs->ARM_sp = stack_start;
+	} else {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		thread->cpu_context.r4 = stk_sz;
+		thread->cpu_context.r5 = stack_start;
+		childregs->ARM_cpsr = SVC_MODE;
+	}
+	thread->cpu_context.pc = (unsigned long)ret_from_fork;
+	thread->cpu_context.sp = (unsigned long)childregs;
+
+	clear_ptrace_hw_breakpoint(p);
+
+	if (clone_flags & CLONE_SETTLS)
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tpuser();
+
+	thread_notify(THREAD_NOTIFY_COPY, thread);
+
+	return 0;
+}
+
+/*
+ * Fill in the task's elfregs structure for a core dump.
+ */
+int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs)
+{
+	elf_core_copy_regs(elfregs, task_pt_regs(t));
+	return 1;
+}
+
+/*
+ * fill in the fpe structure for a core dump...
+ */
+int dump_fpu (struct pt_regs *regs, struct user_fp *fp)
+{
+	struct thread_info *thread = current_thread_info();
+	int used_math = thread->used_cp[1] | thread->used_cp[2];
+
+	if (used_math)
+		memcpy(fp, &thread->fpstate.soft, sizeof (*fp));
+
+	return used_math != 0;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	struct stackframe frame;
+	unsigned long stack_page;
+	int count = 0;
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	frame.fp = thread_saved_fp(p);
+	frame.sp = thread_saved_sp(p);
+	frame.lr = 0;			/* recovered from the stack */
+	frame.pc = thread_saved_pc(p);
+	stack_page = (unsigned long)task_stack_page(p);
+	do {
+		if (frame.sp < stack_page ||
+		    frame.sp >= stack_page + THREAD_SIZE ||
+		    unwind_frame(&frame) < 0)
+			return 0;
+		if (!in_sched_functions(frame.pc))
+			return frame.pc;
+	} while (count ++ < 16);
+	return 0;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
+#ifdef CONFIG_MMU
+#ifdef CONFIG_KUSER_HELPERS
+/*
+ * The vectors page is always readable from user space for the
+ * atomic helpers. Insert it into the gate_vma so that it is visible
+ * through ptrace and /proc/<pid>/mem.
+ */
+static struct vm_area_struct gate_vma = {
+	.vm_start	= 0xffff0000,
+	.vm_end		= 0xffff0000 + PAGE_SIZE,
+	.vm_flags	= VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC,
+};
+
+static int __init gate_vma_init(void)
+{
+	gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+	return 0;
+}
+arch_initcall(gate_vma_init);
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+	return &gate_vma;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+	return (addr >= gate_vma.vm_start) && (addr < gate_vma.vm_end);
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+	return in_gate_area(NULL, addr);
+}
+#define is_gate_vma(vma)	((vma) == &gate_vma)
+#else
+#define is_gate_vma(vma)	0
+#endif
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	return is_gate_vma(vma) ? "[vectors]" : NULL;
+}
+
+/* If possible, provide a placement hint at a random offset from the
+ * stack for the sigpage and vdso pages.
+ */
+static unsigned long sigpage_addr(const struct mm_struct *mm,
+				  unsigned int npages)
+{
+	unsigned long offset;
+	unsigned long first;
+	unsigned long last;
+	unsigned long addr;
+	unsigned int slots;
+
+	first = PAGE_ALIGN(mm->start_stack);
+
+	last = TASK_SIZE - (npages << PAGE_SHIFT);
+
+	/* No room after stack? */
+	if (first > last)
+		return 0;
+
+	/* Just enough room? */
+	if (first == last)
+		return first;
+
+	slots = ((last - first) >> PAGE_SHIFT) + 1;
+
+	offset = get_random_int() % slots;
+
+	addr = first + (offset << PAGE_SHIFT);
+
+	return addr;
+}
+
+static struct page *signal_page;
+extern struct page *get_signal_page(void);
+
+static const struct vm_special_mapping sigpage_mapping = {
+	.name = "[sigpage]",
+	.pages = &signal_page,
+};
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long npages;
+	unsigned long addr;
+	unsigned long hint;
+	int ret = 0;
+
+	if (!signal_page)
+		signal_page = get_signal_page();
+	if (!signal_page)
+		return -ENOMEM;
+
+	npages = 1; /* for sigpage */
+	npages += vdso_total_pages;
+
+	down_write(&mm->mmap_sem);
+	hint = sigpage_addr(mm, npages);
+	addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	vma = _install_special_mapping(mm, addr, PAGE_SIZE,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&sigpage_mapping);
+
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto up_fail;
+	}
+
+	mm->context.sigpage = addr;
+
+	/* Unlike the sigpage, failure to install the vdso is unlikely
+	 * to be fatal to the process, so no error check needed
+	 * here.
+	 */
+	arm_install_vdso(mm, addr + PAGE_SIZE);
+
+ up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+#endif
diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S
new file mode 100644
index 000000000..a78e9e1e2
--- /dev/null
+++ b/arch/arm/kernel/psci-call.S
@@ -0,0 +1,31 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2015 ARM Limited
+ *
+ * Author: Mark Rutland <mark.rutland@arm.com>
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/opcodes-sec.h>
+#include <asm/opcodes-virt.h>
+
+/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_hvc)
+	__HVC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_hvc)
+
+/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_smc)
+	__SMC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_smc)
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
new file mode 100644
index 000000000..f90fdf4ce
--- /dev/null
+++ b/arch/arm/kernel/psci.c
@@ -0,0 +1,299 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#define pr_fmt(fmt) "psci: " fmt
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+#include <uapi/linux/psci.h>
+
+#include <asm/compiler.h>
+#include <asm/errno.h>
+#include <asm/psci.h>
+#include <asm/system_misc.h>
+
+struct psci_operations psci_ops;
+
+static int (*invoke_psci_fn)(u32, u32, u32, u32);
+typedef int (*psci_initcall_t)(const struct device_node *);
+
+asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32);
+asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32);
+
+enum psci_function {
+	PSCI_FN_CPU_SUSPEND,
+	PSCI_FN_CPU_ON,
+	PSCI_FN_CPU_OFF,
+	PSCI_FN_MIGRATE,
+	PSCI_FN_AFFINITY_INFO,
+	PSCI_FN_MIGRATE_INFO_TYPE,
+	PSCI_FN_MAX,
+};
+
+static u32 psci_function_id[PSCI_FN_MAX];
+
+static int psci_to_linux_errno(int errno)
+{
+	switch (errno) {
+	case PSCI_RET_SUCCESS:
+		return 0;
+	case PSCI_RET_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case PSCI_RET_INVALID_PARAMS:
+		return -EINVAL;
+	case PSCI_RET_DENIED:
+		return -EPERM;
+	};
+
+	return -EINVAL;
+}
+
+static u32 psci_power_state_pack(struct psci_power_state state)
+{
+	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
+			& PSCI_0_2_POWER_STATE_ID_MASK) |
+		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+		 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
+		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
+}
+
+static int psci_get_version(void)
+{
+	int err;
+
+	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+	return err;
+}
+
+static int psci_cpu_suspend(struct psci_power_state state,
+			    unsigned long entry_point)
+{
+	int err;
+	u32 fn, power_state;
+
+	fn = psci_function_id[PSCI_FN_CPU_SUSPEND];
+	power_state = psci_power_state_pack(state);
+	err = invoke_psci_fn(fn, power_state, entry_point, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_cpu_off(struct psci_power_state state)
+{
+	int err;
+	u32 fn, power_state;
+
+	fn = psci_function_id[PSCI_FN_CPU_OFF];
+	power_state = psci_power_state_pack(state);
+	err = invoke_psci_fn(fn, power_state, 0, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_CPU_ON];
+	err = invoke_psci_fn(fn, cpuid, entry_point, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_migrate(unsigned long cpuid)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_MIGRATE];
+	err = invoke_psci_fn(fn, cpuid, 0, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_affinity_info(unsigned long target_affinity,
+		unsigned long lowest_affinity_level)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
+	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
+	return err;
+}
+
+static int psci_migrate_info_type(void)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
+	err = invoke_psci_fn(fn, 0, 0, 0);
+	return err;
+}
+
+static int get_set_conduit_method(struct device_node *np)
+{
+	const char *method;
+
+	pr_info("probing for conduit method from DT.\n");
+
+	if (of_property_read_string(np, "method", &method)) {
+		pr_warn("missing \"method\" property\n");
+		return -ENXIO;
+	}
+
+	if (!strcmp("hvc", method)) {
+		invoke_psci_fn = __invoke_psci_fn_hvc;
+	} else if (!strcmp("smc", method)) {
+		invoke_psci_fn = __invoke_psci_fn_smc;
+	} else {
+		pr_warn("invalid \"method\" property: %s\n", method);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+}
+
+static void psci_sys_poweroff(void)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+}
+
+/*
+ * PSCI Function IDs for v0.2+ are well defined so use
+ * standard values.
+ */
+static int psci_0_2_init(struct device_node *np)
+{
+	int err, ver;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+
+	ver = psci_get_version();
+
+	if (ver == PSCI_RET_NOT_SUPPORTED) {
+		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
+		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+		err = -EOPNOTSUPP;
+		goto out_put_node;
+	} else {
+		pr_info("PSCIv%d.%d detected in firmware.\n",
+				PSCI_VERSION_MAJOR(ver),
+				PSCI_VERSION_MINOR(ver));
+
+		if (PSCI_VERSION_MAJOR(ver) == 0 &&
+				PSCI_VERSION_MINOR(ver) < 2) {
+			err = -EINVAL;
+			pr_err("Conflicting PSCI version detected.\n");
+			goto out_put_node;
+		}
+	}
+
+	pr_info("Using standard PSCI v0.2 function IDs\n");
+	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND;
+	psci_ops.cpu_suspend = psci_cpu_suspend;
+
+	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
+	psci_ops.cpu_off = psci_cpu_off;
+
+	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON;
+	psci_ops.cpu_on = psci_cpu_on;
+
+	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE;
+	psci_ops.migrate = psci_migrate;
+
+	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO;
+	psci_ops.affinity_info = psci_affinity_info;
+
+	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
+		PSCI_0_2_FN_MIGRATE_INFO_TYPE;
+	psci_ops.migrate_info_type = psci_migrate_info_type;
+
+	arm_pm_restart = psci_sys_reset;
+
+	pm_power_off = psci_sys_poweroff;
+
+out_put_node:
+	of_node_put(np);
+	return err;
+}
+
+/*
+ * PSCI < v0.2 get PSCI Function IDs via DT.
+ */
+static int psci_0_1_init(struct device_node *np)
+{
+	u32 id;
+	int err;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+
+	pr_info("Using PSCI v0.1 Function IDs from DT\n");
+
+	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
+		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
+		psci_ops.cpu_suspend = psci_cpu_suspend;
+	}
+
+	if (!of_property_read_u32(np, "cpu_off", &id)) {
+		psci_function_id[PSCI_FN_CPU_OFF] = id;
+		psci_ops.cpu_off = psci_cpu_off;
+	}
+
+	if (!of_property_read_u32(np, "cpu_on", &id)) {
+		psci_function_id[PSCI_FN_CPU_ON] = id;
+		psci_ops.cpu_on = psci_cpu_on;
+	}
+
+	if (!of_property_read_u32(np, "migrate", &id)) {
+		psci_function_id[PSCI_FN_MIGRATE] = id;
+		psci_ops.migrate = psci_migrate;
+	}
+
+out_put_node:
+	of_node_put(np);
+	return err;
+}
+
+static const struct of_device_id psci_of_match[] __initconst = {
+	{ .compatible = "arm,psci", .data = psci_0_1_init},
+	{ .compatible = "arm,psci-0.2", .data = psci_0_2_init},
+	{},
+};
+
+int __init psci_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *matched_np;
+	psci_initcall_t init_fn;
+
+	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
+	if (!np)
+		return -ENODEV;
+
+	init_fn = (psci_initcall_t)matched_np->data;
+	return init_fn(np);
+}
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
new file mode 100644
index 000000000..28a1db4da
--- /dev/null
+++ b/arch/arm/kernel/psci_smp.c
@@ -0,0 +1,115 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <uapi/linux/psci.h>
+
+#include <asm/psci.h>
+#include <asm/smp_plat.h>
+
+/*
+ * psci_smp assumes that the following is true about PSCI:
+ *
+ * cpu_suspend   Suspend the execution on a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * cpu_off       Power down a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * no return on successful call
+ *
+ * cpu_on        Power up a CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * migrate       Migrate the context to a different CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * returns 0  success, < 0 on failure
+ *
+ */
+
+extern void secondary_startup(void);
+
+static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	if (psci_ops.cpu_on)
+		return psci_ops.cpu_on(cpu_logical_map(cpu),
+				       __pa(secondary_startup));
+	return -ENODEV;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __ref psci_cpu_die(unsigned int cpu)
+{
+       const struct psci_power_state ps = {
+               .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+       };
+
+       if (psci_ops.cpu_off)
+               psci_ops.cpu_off(ps);
+
+       /* We should never return */
+       panic("psci: cpu %d failed to shutdown\n", cpu);
+}
+
+int __ref psci_cpu_kill(unsigned int cpu)
+{
+	int err, i;
+
+	if (!psci_ops.affinity_info)
+		return 1;
+	/*
+	 * cpu_kill could race with cpu_die and we can
+	 * potentially end up declaring this cpu undead
+	 * while it is dying. So, try again a few times.
+	 */
+
+	for (i = 0; i < 10; i++) {
+		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
+		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
+			pr_info("CPU%d killed.\n", cpu);
+			return 1;
+		}
+
+		msleep(10);
+		pr_info("Retrying again to check for CPU kill\n");
+	}
+
+	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+			cpu, err);
+	/* Make platform_cpu_kill() fail. */
+	return 0;
+}
+
+#endif
+
+bool __init psci_smp_available(void)
+{
+	/* is cpu_on available at least? */
+	return (psci_ops.cpu_on != NULL);
+}
+
+struct smp_operations __initdata psci_smp_ops = {
+	.smp_boot_secondary	= psci_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_die		= psci_cpu_die,
+	.cpu_kill		= psci_cpu_kill,
+#endif
+};
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
new file mode 100644
index 000000000..ef9119f74
--- /dev/null
+++ b/arch/arm/kernel/ptrace.c
@@ -0,0 +1,977 @@
+/*
+ *  linux/arch/arm/kernel/ptrace.c
+ *
+ *  By Ross Biro 1/23/92
+ * edited by Linus Torvalds
+ * ARM modifications Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/elf.h>
+#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/uaccess.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/regset.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/unistd.h>
+
+#include <asm/pgtable.h>
+#include <asm/traps.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#define REG_PC	15
+#define REG_PSR	16
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+#if 0
+/*
+ * Breakpoint SWI instruction: SWI &9F0001
+ */
+#define BREAKINST_ARM	0xef9f0001
+#define BREAKINST_THUMB	0xdf00		/* fill this in later */
+#else
+/*
+ * New breakpoints - use an undefined instruction.  The ARM architecture
+ * reference manual guarantees that the following instruction space
+ * will produce an undefined instruction exception on all CPUs:
+ *
+ *  ARM:   xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
+ *  Thumb: 1101 1110 xxxx xxxx
+ */
+#define BREAKINST_ARM	0xe7f001f0
+#define BREAKINST_THUMB	0xde01
+#endif
+
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_NAME(r) \
+	{.name = #r, .offset = offsetof(struct pt_regs, ARM_##r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+	REG_OFFSET_NAME(r0),
+	REG_OFFSET_NAME(r1),
+	REG_OFFSET_NAME(r2),
+	REG_OFFSET_NAME(r3),
+	REG_OFFSET_NAME(r4),
+	REG_OFFSET_NAME(r5),
+	REG_OFFSET_NAME(r6),
+	REG_OFFSET_NAME(r7),
+	REG_OFFSET_NAME(r8),
+	REG_OFFSET_NAME(r9),
+	REG_OFFSET_NAME(r10),
+	REG_OFFSET_NAME(fp),
+	REG_OFFSET_NAME(ip),
+	REG_OFFSET_NAME(sp),
+	REG_OFFSET_NAME(lr),
+	REG_OFFSET_NAME(pc),
+	REG_OFFSET_NAME(cpsr),
+	REG_OFFSET_NAME(ORIG_r0),
+	REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset:	the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (roff->offset == offset)
+			return roff->name;
+	return NULL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @addr:      address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+	return ((addr & ~(THREAD_SIZE - 1))  ==
+		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @n:		stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+	addr += n;
+	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+		return *addr;
+	else
+		return 0;
+}
+
+/*
+ * this routine will get a word off of the processes privileged stack.
+ * the offset is how far from the base addr as stored in the THREAD.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline long get_user_reg(struct task_struct *task, int offset)
+{
+	return task_pt_regs(task)->uregs[offset];
+}
+
+/*
+ * this routine will put a word on the processes privileged stack.
+ * the offset is how far from the base addr as stored in the THREAD.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int
+put_user_reg(struct task_struct *task, int offset, long data)
+{
+	struct pt_regs newregs, *regs = task_pt_regs(task);
+	int ret = -EINVAL;
+
+	newregs = *regs;
+	newregs.uregs[offset] = data;
+
+	if (valid_user_regs(&newregs)) {
+		regs->uregs[offset] = data;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	/* Nothing to do. */
+}
+
+/*
+ * Handle hitting a breakpoint.
+ */
+void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code  = TRAP_BRKPT;
+	info.si_addr  = (void __user *)instruction_pointer(regs);
+
+	force_sig_info(SIGTRAP, &info, tsk);
+}
+
+static int break_trap(struct pt_regs *regs, unsigned int instr)
+{
+	ptrace_break(current, regs);
+	return 0;
+}
+
+static struct undef_hook arm_break_hook = {
+	.instr_mask	= 0x0fffffff,
+	.instr_val	= 0x07f001f0,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= break_trap,
+};
+
+static struct undef_hook thumb_break_hook = {
+	.instr_mask	= 0xffff,
+	.instr_val	= 0xde01,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= PSR_T_BIT,
+	.fn		= break_trap,
+};
+
+static struct undef_hook thumb2_break_hook = {
+	.instr_mask	= 0xffffffff,
+	.instr_val	= 0xf7f0a000,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= PSR_T_BIT,
+	.fn		= break_trap,
+};
+
+static int __init ptrace_break_init(void)
+{
+	register_undef_hook(&arm_break_hook);
+	register_undef_hook(&thumb_break_hook);
+	register_undef_hook(&thumb2_break_hook);
+	return 0;
+}
+
+core_initcall(ptrace_break_init);
+
+/*
+ * Read the word at offset "off" into the "struct user".  We
+ * actually access the pt_regs stored on the kernel stack.
+ */
+static int ptrace_read_user(struct task_struct *tsk, unsigned long off,
+			    unsigned long __user *ret)
+{
+	unsigned long tmp;
+
+	if (off & 3)
+		return -EIO;
+
+	tmp = 0;
+	if (off == PT_TEXT_ADDR)
+		tmp = tsk->mm->start_code;
+	else if (off == PT_DATA_ADDR)
+		tmp = tsk->mm->start_data;
+	else if (off == PT_TEXT_END_ADDR)
+		tmp = tsk->mm->end_code;
+	else if (off < sizeof(struct pt_regs))
+		tmp = get_user_reg(tsk, off >> 2);
+	else if (off >= sizeof(struct user))
+		return -EIO;
+
+	return put_user(tmp, ret);
+}
+
+/*
+ * Write the word at offset "off" into "struct user".  We
+ * actually access the pt_regs stored on the kernel stack.
+ */
+static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
+			     unsigned long val)
+{
+	if (off & 3 || off >= sizeof(struct user))
+		return -EIO;
+
+	if (off >= sizeof(struct pt_regs))
+		return 0;
+
+	return put_user_reg(tsk, off >> 2, val);
+}
+
+#ifdef CONFIG_IWMMXT
+
+/*
+ * Get the child iWMMXt state.
+ */
+static int ptrace_getwmmxregs(struct task_struct *tsk, void __user *ufp)
+{
+	struct thread_info *thread = task_thread_info(tsk);
+
+	if (!test_ti_thread_flag(thread, TIF_USING_IWMMXT))
+		return -ENODATA;
+	iwmmxt_task_disable(thread);  /* force it to ram */
+	return copy_to_user(ufp, &thread->fpstate.iwmmxt, IWMMXT_SIZE)
+		? -EFAULT : 0;
+}
+
+/*
+ * Set the child iWMMXt state.
+ */
+static int ptrace_setwmmxregs(struct task_struct *tsk, void __user *ufp)
+{
+	struct thread_info *thread = task_thread_info(tsk);
+
+	if (!test_ti_thread_flag(thread, TIF_USING_IWMMXT))
+		return -EACCES;
+	iwmmxt_task_release(thread);  /* force a reload */
+	return copy_from_user(&thread->fpstate.iwmmxt, ufp, IWMMXT_SIZE)
+		? -EFAULT : 0;
+}
+
+#endif
+
+#ifdef CONFIG_CRUNCH
+/*
+ * Get the child Crunch state.
+ */
+static int ptrace_getcrunchregs(struct task_struct *tsk, void __user *ufp)
+{
+	struct thread_info *thread = task_thread_info(tsk);
+
+	crunch_task_disable(thread);  /* force it to ram */
+	return copy_to_user(ufp, &thread->crunchstate, CRUNCH_SIZE)
+		? -EFAULT : 0;
+}
+
+/*
+ * Set the child Crunch state.
+ */
+static int ptrace_setcrunchregs(struct task_struct *tsk, void __user *ufp)
+{
+	struct thread_info *thread = task_thread_info(tsk);
+
+	crunch_task_release(thread);  /* force a reload */
+	return copy_from_user(&thread->crunchstate, ufp, CRUNCH_SIZE)
+		? -EFAULT : 0;
+}
+#endif
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+/*
+ * Convert a virtual register number into an index for a thread_info
+ * breakpoint array. Breakpoints are identified using positive numbers
+ * whilst watchpoints are negative. The registers are laid out as pairs
+ * of (address, control), each pair mapping to a unique hw_breakpoint struct.
+ * Register 0 is reserved for describing resource information.
+ */
+static int ptrace_hbp_num_to_idx(long num)
+{
+	if (num < 0)
+		num = (ARM_MAX_BRP << 1) - num;
+	return (num - 1) >> 1;
+}
+
+/*
+ * Returns the virtual register number for the address of the
+ * breakpoint at index idx.
+ */
+static long ptrace_hbp_idx_to_num(int idx)
+{
+	long mid = ARM_MAX_BRP << 1;
+	long num = (idx << 1) + 1;
+	return num > mid ? mid - num : num;
+}
+
+/*
+ * Handle hitting a HW-breakpoint.
+ */
+static void ptrace_hbptriggered(struct perf_event *bp,
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs)
+{
+	struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
+	long num;
+	int i;
+	siginfo_t info;
+
+	for (i = 0; i < ARM_MAX_HBP_SLOTS; ++i)
+		if (current->thread.debug.hbp[i] == bp)
+			break;
+
+	num = (i == ARM_MAX_HBP_SLOTS) ? 0 : ptrace_hbp_idx_to_num(i);
+
+	info.si_signo	= SIGTRAP;
+	info.si_errno	= (int)num;
+	info.si_code	= TRAP_HWBKPT;
+	info.si_addr	= (void __user *)(bkpt->trigger);
+
+	force_sig_info(SIGTRAP, &info, current);
+}
+
+/*
+ * Set ptrace breakpoint pointers to zero for this task.
+ * This is required in order to prevent child processes from unregistering
+ * breakpoints held by their parent.
+ */
+void clear_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	memset(tsk->thread.debug.hbp, 0, sizeof(tsk->thread.debug.hbp));
+}
+
+/*
+ * Unregister breakpoints from this task and reset the pointers in
+ * the thread_struct.
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < ARM_MAX_HBP_SLOTS; i++) {
+		if (t->debug.hbp[i]) {
+			unregister_hw_breakpoint(t->debug.hbp[i]);
+			t->debug.hbp[i] = NULL;
+		}
+	}
+}
+
+static u32 ptrace_get_hbp_resource_info(void)
+{
+	u8 num_brps, num_wrps, debug_arch, wp_len;
+	u32 reg = 0;
+
+	num_brps	= hw_breakpoint_slots(TYPE_INST);
+	num_wrps	= hw_breakpoint_slots(TYPE_DATA);
+	debug_arch	= arch_get_debug_arch();
+	wp_len		= arch_get_max_wp_len();
+
+	reg		|= debug_arch;
+	reg		<<= 8;
+	reg		|= wp_len;
+	reg		<<= 8;
+	reg		|= num_wrps;
+	reg		<<= 8;
+	reg		|= num_brps;
+
+	return reg;
+}
+
+static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)
+{
+	struct perf_event_attr attr;
+
+	ptrace_breakpoint_init(&attr);
+
+	/* Initialise fields to sane defaults. */
+	attr.bp_addr	= 0;
+	attr.bp_len	= HW_BREAKPOINT_LEN_4;
+	attr.bp_type	= type;
+	attr.disabled	= 1;
+
+	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL,
+					   tsk);
+}
+
+static int ptrace_gethbpregs(struct task_struct *tsk, long num,
+			     unsigned long  __user *data)
+{
+	u32 reg;
+	int idx, ret = 0;
+	struct perf_event *bp;
+	struct arch_hw_breakpoint_ctrl arch_ctrl;
+
+	if (num == 0) {
+		reg = ptrace_get_hbp_resource_info();
+	} else {
+		idx = ptrace_hbp_num_to_idx(num);
+		if (idx < 0 || idx >= ARM_MAX_HBP_SLOTS) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		bp = tsk->thread.debug.hbp[idx];
+		if (!bp) {
+			reg = 0;
+			goto put;
+		}
+
+		arch_ctrl = counter_arch_bp(bp)->ctrl;
+
+		/*
+		 * Fix up the len because we may have adjusted it
+		 * to compensate for an unaligned address.
+		 */
+		while (!(arch_ctrl.len & 0x1))
+			arch_ctrl.len >>= 1;
+
+		if (num & 0x1)
+			reg = bp->attr.bp_addr;
+		else
+			reg = encode_ctrl_reg(arch_ctrl);
+	}
+
+put:
+	if (put_user(reg, data))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+static int ptrace_sethbpregs(struct task_struct *tsk, long num,
+			     unsigned long __user *data)
+{
+	int idx, gen_len, gen_type, implied_type, ret = 0;
+	u32 user_val;
+	struct perf_event *bp;
+	struct arch_hw_breakpoint_ctrl ctrl;
+	struct perf_event_attr attr;
+
+	if (num == 0)
+		goto out;
+	else if (num < 0)
+		implied_type = HW_BREAKPOINT_RW;
+	else
+		implied_type = HW_BREAKPOINT_X;
+
+	idx = ptrace_hbp_num_to_idx(num);
+	if (idx < 0 || idx >= ARM_MAX_HBP_SLOTS) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (get_user(user_val, data)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	bp = tsk->thread.debug.hbp[idx];
+	if (!bp) {
+		bp = ptrace_hbp_create(tsk, implied_type);
+		if (IS_ERR(bp)) {
+			ret = PTR_ERR(bp);
+			goto out;
+		}
+		tsk->thread.debug.hbp[idx] = bp;
+	}
+
+	attr = bp->attr;
+
+	if (num & 0x1) {
+		/* Address */
+		attr.bp_addr	= user_val;
+	} else {
+		/* Control */
+		decode_ctrl_reg(user_val, &ctrl);
+		ret = arch_bp_generic_fields(ctrl, &gen_len, &gen_type);
+		if (ret)
+			goto out;
+
+		if ((gen_type & implied_type) != gen_type) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		attr.bp_len	= gen_len;
+		attr.bp_type	= gen_type;
+		attr.disabled	= !ctrl.enabled;
+	}
+
+	ret = modify_user_hw_breakpoint(bp, &attr);
+out:
+	return ret;
+}
+#endif
+
+/* regset get/set implementations */
+
+static int gpr_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   regs,
+				   0, sizeof(*regs));
+}
+
+static int gpr_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct pt_regs newregs;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &newregs,
+				 0, sizeof(newregs));
+	if (ret)
+		return ret;
+
+	if (!valid_user_regs(&newregs))
+		return -EINVAL;
+
+	*task_pt_regs(target) = newregs;
+	return 0;
+}
+
+static int fpa_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &task_thread_info(target)->fpstate,
+				   0, sizeof(struct user_fp));
+}
+
+static int fpa_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	struct thread_info *thread = task_thread_info(target);
+
+	thread->used_cp[1] = thread->used_cp[2] = 1;
+
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+		&thread->fpstate,
+		0, sizeof(struct user_fp));
+}
+
+#ifdef CONFIG_VFP
+/*
+ * VFP register get/set implementations.
+ *
+ * With respect to the kernel, struct user_fp is divided into three chunks:
+ * 16 or 32 real VFP registers (d0-d15 or d0-31)
+ *	These are transferred to/from the real registers in the task's
+ *	vfp_hard_struct.  The number of registers depends on the kernel
+ *	configuration.
+ *
+ * 16 or 0 fake VFP registers (d16-d31 or empty)
+ *	i.e., the user_vfp structure has space for 32 registers even if
+ *	the kernel doesn't have them all.
+ *
+ *	vfp_get() reads this chunk as zero where applicable
+ *	vfp_set() ignores this chunk
+ *
+ * 1 word for the FPSCR
+ *
+ * The bounds-checking logic built into user_regset_copyout and friends
+ * means that we can make a simple sequence of calls to map the relevant data
+ * to/from the specified slice of the user regset structure.
+ */
+static int vfp_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	int ret;
+	struct thread_info *thread = task_thread_info(target);
+	struct vfp_hard_struct const *vfp = &thread->vfpstate.hard;
+	const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs);
+	const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr);
+
+	vfp_sync_hwstate(thread);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &vfp->fpregs,
+				  user_fpregs_offset,
+				  user_fpregs_offset + sizeof(vfp->fpregs));
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+				       user_fpregs_offset + sizeof(vfp->fpregs),
+				       user_fpscr_offset);
+	if (ret)
+		return ret;
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &vfp->fpscr,
+				   user_fpscr_offset,
+				   user_fpscr_offset + sizeof(vfp->fpscr));
+}
+
+/*
+ * For vfp_set() a read-modify-write is done on the VFP registers,
+ * in order to avoid writing back a half-modified set of registers on
+ * failure.
+ */
+static int vfp_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct thread_info *thread = task_thread_info(target);
+	struct vfp_hard_struct new_vfp;
+	const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs);
+	const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr);
+
+	vfp_sync_hwstate(thread);
+	new_vfp = thread->vfpstate.hard;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &new_vfp.fpregs,
+				  user_fpregs_offset,
+				  user_fpregs_offset + sizeof(new_vfp.fpregs));
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+				user_fpregs_offset + sizeof(new_vfp.fpregs),
+				user_fpscr_offset);
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &new_vfp.fpscr,
+				 user_fpscr_offset,
+				 user_fpscr_offset + sizeof(new_vfp.fpscr));
+	if (ret)
+		return ret;
+
+	vfp_flush_hwstate(thread);
+	thread->vfpstate.hard = new_vfp;
+
+	return 0;
+}
+#endif /* CONFIG_VFP */
+
+enum arm_regset {
+	REGSET_GPR,
+	REGSET_FPR,
+#ifdef CONFIG_VFP
+	REGSET_VFP,
+#endif
+};
+
+static const struct user_regset arm_regsets[] = {
+	[REGSET_GPR] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.get = gpr_get,
+		.set = gpr_set
+	},
+	[REGSET_FPR] = {
+		/*
+		 * For the FPA regs in fpstate, the real fields are a mixture
+		 * of sizes, so pretend that the registers are word-sized:
+		 */
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct user_fp) / sizeof(u32),
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.get = fpa_get,
+		.set = fpa_set
+	},
+#ifdef CONFIG_VFP
+	[REGSET_VFP] = {
+		/*
+		 * Pretend that the VFP regs are word-sized, since the FPSCR is
+		 * a single word dangling at the end of struct user_vfp:
+		 */
+		.core_note_type = NT_ARM_VFP,
+		.n = ARM_VFPREGS_SIZE / sizeof(u32),
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.get = vfp_get,
+		.set = vfp_set
+	},
+#endif /* CONFIG_VFP */
+};
+
+static const struct user_regset_view user_arm_view = {
+	.name = "arm", .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+	.regsets = arm_regsets, .n = ARRAY_SIZE(arm_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_arm_view;
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	int ret;
+	unsigned long __user *datap = (unsigned long __user *) data;
+
+	switch (request) {
+		case PTRACE_PEEKUSR:
+			ret = ptrace_read_user(child, addr, datap);
+			break;
+
+		case PTRACE_POKEUSR:
+			ret = ptrace_write_user(child, addr, data);
+			break;
+
+		case PTRACE_GETREGS:
+			ret = copy_regset_to_user(child,
+						  &user_arm_view, REGSET_GPR,
+						  0, sizeof(struct pt_regs),
+						  datap);
+			break;
+
+		case PTRACE_SETREGS:
+			ret = copy_regset_from_user(child,
+						    &user_arm_view, REGSET_GPR,
+						    0, sizeof(struct pt_regs),
+						    datap);
+			break;
+
+		case PTRACE_GETFPREGS:
+			ret = copy_regset_to_user(child,
+						  &user_arm_view, REGSET_FPR,
+						  0, sizeof(union fp_state),
+						  datap);
+			break;
+
+		case PTRACE_SETFPREGS:
+			ret = copy_regset_from_user(child,
+						    &user_arm_view, REGSET_FPR,
+						    0, sizeof(union fp_state),
+						    datap);
+			break;
+
+#ifdef CONFIG_IWMMXT
+		case PTRACE_GETWMMXREGS:
+			ret = ptrace_getwmmxregs(child, datap);
+			break;
+
+		case PTRACE_SETWMMXREGS:
+			ret = ptrace_setwmmxregs(child, datap);
+			break;
+#endif
+
+		case PTRACE_GET_THREAD_AREA:
+			ret = put_user(task_thread_info(child)->tp_value[0],
+				       datap);
+			break;
+
+		case PTRACE_SET_SYSCALL:
+			task_thread_info(child)->syscall = data;
+			ret = 0;
+			break;
+
+#ifdef CONFIG_CRUNCH
+		case PTRACE_GETCRUNCHREGS:
+			ret = ptrace_getcrunchregs(child, datap);
+			break;
+
+		case PTRACE_SETCRUNCHREGS:
+			ret = ptrace_setcrunchregs(child, datap);
+			break;
+#endif
+
+#ifdef CONFIG_VFP
+		case PTRACE_GETVFPREGS:
+			ret = copy_regset_to_user(child,
+						  &user_arm_view, REGSET_VFP,
+						  0, ARM_VFPREGS_SIZE,
+						  datap);
+			break;
+
+		case PTRACE_SETVFPREGS:
+			ret = copy_regset_from_user(child,
+						    &user_arm_view, REGSET_VFP,
+						    0, ARM_VFPREGS_SIZE,
+						    datap);
+			break;
+#endif
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		case PTRACE_GETHBPREGS:
+			ret = ptrace_gethbpregs(child, addr,
+						(unsigned long __user *)data);
+			break;
+		case PTRACE_SETHBPREGS:
+			ret = ptrace_sethbpregs(child, addr,
+						(unsigned long __user *)data);
+			break;
+#endif
+
+		default:
+			ret = ptrace_request(child, request, addr, data);
+			break;
+	}
+
+	return ret;
+}
+
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
+
+static void tracehook_report_syscall(struct pt_regs *regs,
+				    enum ptrace_syscall_dir dir)
+{
+	unsigned long ip;
+
+	/*
+	 * IP is used to denote syscall entry/exit:
+	 * IP = 0 -> entry, =1 -> exit
+	 */
+	ip = regs->ARM_ip;
+	regs->ARM_ip = dir;
+
+	if (dir == PTRACE_SYSCALL_EXIT)
+		tracehook_report_syscall_exit(regs, 0);
+	else if (tracehook_report_syscall_entry(regs))
+		current_thread_info()->syscall = -1;
+
+	regs->ARM_ip = ip;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
+{
+	current_thread_info()->syscall = scno;
+
+	/* Do the secure computing check first; failures should be fast. */
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+	if (secure_computing() == -1)
+		return -1;
+#else
+	/* XXX: remove this once OABI gets fixed */
+	secure_computing_strict(scno);
+#endif
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+	scno = current_thread_info()->syscall;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_enter(regs, scno);
+
+	audit_syscall_entry(scno, regs->ARM_r0, regs->ARM_r1, regs->ARM_r2,
+			    regs->ARM_r3);
+
+	return scno;
+}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	/*
+	 * Audit the syscall before anything else, as a debugger may
+	 * come in and change the current registers.
+	 */
+	audit_syscall_exit(regs);
+
+	/*
+	 * Note that we haven't updated the ->syscall field for the
+	 * current thread. This isn't a problem because it will have
+	 * been set on syscall entry and there hasn't been an opportunity
+	 * for a PTRACE_SET_SYSCALL since then.
+	 */
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_exit(regs, regs_return_value(regs));
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+}
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
new file mode 100644
index 000000000..1a4d23279
--- /dev/null
+++ b/arch/arm/kernel/reboot.c
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ *  Original Copyright (C) 1995  Linus Torvalds
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+
+#include "reboot.h"
+
+typedef void (*phys_reset_t)(unsigned long);
+
+/*
+ * Function pointers to optional machine specific functions
+ */
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * A temporary stack to use for CPU reset. This is static so that we
+ * don't clobber it with the identity mapping. When running with this
+ * stack, any references to the current task *will not work* so you
+ * should really do as little as possible before jumping to your reset
+ * code.
+ */
+static u64 soft_restart_stack[16];
+
+static void __soft_restart(void *addr)
+{
+	phys_reset_t phys_reset;
+
+	/* Take out a flat memory mapping. */
+	setup_mm_for_reboot();
+
+	/* Clean and invalidate caches */
+	flush_cache_all();
+
+	/* Turn off caching */
+	cpu_proc_fin();
+
+	/* Push out any further dirty data, and ensure cache is empty */
+	flush_cache_all();
+
+	/* Switch to the identity mapping. */
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset((unsigned long)addr);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void _soft_restart(unsigned long addr, bool disable_l2)
+{
+	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
+
+	/* Disable interrupts first */
+	raw_local_irq_disable();
+	local_fiq_disable();
+
+	/* Disable the L2 if we're the last man standing. */
+	if (disable_l2)
+		outer_disable();
+
+	/* Change to the new stack and continue with the reset. */
+	call_with_stack(__soft_restart, (void *)addr, (void *)stack);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void soft_restart(unsigned long addr)
+{
+	_soft_restart(addr, num_online_cpus() == 1);
+}
+
+/*
+ * Called by kexec, immediately prior to machine_kexec().
+ *
+ * This must completely disable all secondary CPUs; simply causing those CPUs
+ * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
+ * kexec'd kernel to use any and all RAM as it sees fit, without having to
+ * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
+ * functionality embodied in disable_nonboot_cpus() to achieve this.
+ */
+void machine_shutdown(void)
+{
+	disable_nonboot_cpus();
+}
+
+/*
+ * Halting simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this.
+ */
+void machine_halt(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	local_irq_disable();
+	while (1);
+}
+
+/*
+ * Power-off simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this. When the system power is turned off, it will take all CPUs
+ * with it.
+ */
+void machine_power_off(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (pm_power_off)
+		pm_power_off();
+}
+
+/*
+ * Restart requires that the secondary CPUs stop performing any activity
+ * while the primary CPU resets the system. Systems with a single CPU can
+ * use soft_restart() as their machine descriptor's .restart hook, since that
+ * will cause the only available CPU to reset. Systems with multiple CPUs must
+ * provide a HW restart implementation, to ensure that all CPUs reset at once.
+ * This is required so that any code running after reset on the primary CPU
+ * doesn't have to co-ordinate with other CPUs to ensure they aren't still
+ * executing pre-reset code, and using RAM that the primary CPU's code wishes
+ * to use. Implementing such co-ordination would be essentially impossible.
+ */
+void machine_restart(char *cmd)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (arm_pm_restart)
+		arm_pm_restart(reboot_mode, cmd);
+	else
+		do_kernel_restart(cmd);
+
+	/* Give a grace period for failure to restart of 1s */
+	mdelay(1000);
+
+	/* Whoops - the platform was unable to reboot. Tell the user! */
+	printk("Reboot failed -- System halted\n");
+	local_irq_disable();
+	while (1);
+}
diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h
new file mode 100644
index 000000000..bf7a0b1f0
--- /dev/null
+++ b/arch/arm/kernel/reboot.h
@@ -0,0 +1,7 @@
+#ifndef REBOOT_H
+#define REBOOT_H
+
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+extern void _soft_restart(unsigned long addr, bool disable_l2);
+
+#endif
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
new file mode 100644
index 000000000..35e72585e
--- /dev/null
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -0,0 +1,93 @@
+/*
+ * relocate_kernel.S - put the kernel image in place to boot
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+
+	.align	3	/* not needed for this code, but keeps fncpy() happy */
+
+ENTRY(relocate_new_kernel)
+
+	ldr	r0,kexec_indirection_page
+	ldr	r1,kexec_start_address
+
+	/*
+	 * If there is no indirection page (we are doing crashdumps)
+	 * skip any relocation.
+	 */
+	cmp	r0, #0
+	beq	2f
+
+0:	/* top, read another word for the indirection page */
+	ldr	r3, [r0],#4
+
+	/* Is it a destination page. Put destination address to r4 */
+	tst	r3,#1,0
+	beq	1f
+	bic	r4,r3,#1
+	b	0b
+1:
+	/* Is it an indirection page */
+	tst	r3,#2,0
+	beq	1f
+	bic	r0,r3,#2
+	b	0b
+1:
+
+	/* are we done ? */
+	tst	r3,#4,0
+	beq	1f
+	b	2f
+
+1:
+	/* is it source ? */
+	tst	r3,#8,0
+	beq	0b
+	bic r3,r3,#8
+	mov r6,#1024
+9:
+	ldr r5,[r3],#4
+	str r5,[r4],#4
+	subs r6,r6,#1
+	bne 9b
+	b 0b
+
+2:
+	/* Jump to relocated kernel */
+	mov lr,r1
+	mov r0,#0
+	ldr r1,kexec_mach_type
+	ldr r2,kexec_boot_atags
+ ARM(	ret lr	)
+ THUMB(	bx lr		)
+
+	.align
+
+	.globl kexec_start_address
+kexec_start_address:
+	.long	0x0
+
+	.globl kexec_indirection_page
+kexec_indirection_page:
+	.long	0x0
+
+	.globl kexec_mach_type
+kexec_mach_type:
+	.long	0x0
+
+	/* phy addr of the atags for the new kernel */
+	.globl kexec_boot_atags
+kexec_boot_atags:
+	.long	0x0
+
+ENDPROC(relocate_new_kernel)
+
+relocate_new_kernel_end:
+
+	.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.long relocate_new_kernel_end - relocate_new_kernel
+
+
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
new file mode 100644
index 000000000..36ed35073
--- /dev/null
+++ b/arch/arm/kernel/return_address.c
@@ -0,0 +1,61 @@
+/*
+ * arch/arm/kernel/return_address.c
+ *
+ * Copyright (C) 2009 Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
+ * for Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#include <linux/export.h>
+#include <linux/ftrace.h>
+
+#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND)
+#include <linux/sched.h>
+
+#include <asm/stacktrace.h>
+
+struct return_address_data {
+	unsigned int level;
+	void *addr;
+};
+
+static int save_return_addr(struct stackframe *frame, void *d)
+{
+	struct return_address_data *data = d;
+
+	if (!data->level) {
+		data->addr = (void *)frame->pc;
+
+		return 1;
+	} else {
+		--data->level;
+		return 0;
+	}
+}
+
+void *return_address(unsigned int level)
+{
+	struct return_address_data data;
+	struct stackframe frame;
+
+	data.level = level + 2;
+	data.addr = NULL;
+
+	frame.fp = (unsigned long)__builtin_frame_address(0);
+	frame.sp = current_stack_pointer;
+	frame.lr = (unsigned long)__builtin_return_address(0);
+	frame.pc = (unsigned long)return_address;
+
+	walk_stackframe(&frame, save_return_addr, &data);
+
+	if (!data.level)
+		return data.addr;
+	else
+		return NULL;
+}
+
+#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
+
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
new file mode 100644
index 000000000..6c777e908
--- /dev/null
+++ b/arch/arm/kernel/setup.c
@@ -0,0 +1,1138 @@
+/*
+ *  linux/arch/arm/kernel/setup.c
+ *
+ *  Copyright (C) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/utsname.h>
+#include <linux/initrd.h>
+#include <linux/console.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/screen_info.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/init.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/proc_fs.h>
+#include <linux/memblock.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/sort.h>
+
+#include <asm/unified.h>
+#include <asm/cp15.h>
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/elf.h>
+#include <asm/procinfo.h>
+#include <asm/psci.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/smp_plat.h>
+#include <asm/mach-types.h>
+#include <asm/cacheflush.h>
+#include <asm/cachetype.h>
+#include <asm/tlbflush.h>
+
+#include <asm/prom.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/time.h>
+#include <asm/system_info.h>
+#include <asm/system_misc.h>
+#include <asm/traps.h>
+#include <asm/unwind.h>
+#include <asm/memblock.h>
+#include <asm/virt.h>
+
+#include "atags.h"
+
+
+#if defined(CONFIG_FPE_NWFPE) || defined(CONFIG_FPE_FASTFPE)
+char fpe_type[8];
+
+static int __init fpe_setup(char *line)
+{
+	memcpy(fpe_type, line, 8);
+	return 1;
+}
+
+__setup("fpe=", fpe_setup);
+#endif
+
+extern void init_default_cache_policy(unsigned long);
+extern void paging_init(const struct machine_desc *desc);
+extern void early_paging_init(const struct machine_desc *,
+			      struct proc_info_list *);
+extern void sanity_check_meminfo(void);
+extern enum reboot_mode reboot_mode;
+extern void setup_dma_zone(const struct machine_desc *desc);
+
+unsigned int processor_id;
+EXPORT_SYMBOL(processor_id);
+unsigned int __machine_arch_type __read_mostly;
+EXPORT_SYMBOL(__machine_arch_type);
+unsigned int cacheid __read_mostly;
+EXPORT_SYMBOL(cacheid);
+
+unsigned int __atags_pointer __initdata;
+
+unsigned int system_rev;
+EXPORT_SYMBOL(system_rev);
+
+unsigned int system_serial_low;
+EXPORT_SYMBOL(system_serial_low);
+
+unsigned int system_serial_high;
+EXPORT_SYMBOL(system_serial_high);
+
+unsigned int elf_hwcap __read_mostly;
+EXPORT_SYMBOL(elf_hwcap);
+
+unsigned int elf_hwcap2 __read_mostly;
+EXPORT_SYMBOL(elf_hwcap2);
+
+
+#ifdef MULTI_CPU
+struct processor processor __read_mostly;
+#endif
+#ifdef MULTI_TLB
+struct cpu_tlb_fns cpu_tlb __read_mostly;
+#endif
+#ifdef MULTI_USER
+struct cpu_user_fns cpu_user __read_mostly;
+#endif
+#ifdef MULTI_CACHE
+struct cpu_cache_fns cpu_cache __read_mostly;
+#endif
+#ifdef CONFIG_OUTER_CACHE
+struct outer_cache_fns outer_cache __read_mostly;
+EXPORT_SYMBOL(outer_cache);
+#endif
+
+/*
+ * Cached cpu_architecture() result for use by assembler code.
+ * C code should use the cpu_architecture() function instead of accessing this
+ * variable directly.
+ */
+int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN;
+
+struct stack {
+	u32 irq[3];
+	u32 abt[3];
+	u32 und[3];
+	u32 fiq[3];
+} ____cacheline_aligned;
+
+#ifndef CONFIG_CPU_V7M
+static struct stack stacks[NR_CPUS];
+#endif
+
+char elf_platform[ELF_PLATFORM_SIZE];
+EXPORT_SYMBOL(elf_platform);
+
+static const char *cpu_name;
+static const char *machine_name;
+static char __initdata cmd_line[COMMAND_LINE_SIZE];
+const struct machine_desc *machine_desc __initdata;
+
+static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } };
+#define ENDIANNESS ((char)endian_test.l)
+
+DEFINE_PER_CPU(struct cpuinfo_arm, cpu_data);
+
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+	{
+		.name = "Video RAM",
+		.start = 0,
+		.end = 0,
+		.flags = IORESOURCE_MEM
+	},
+	{
+		.name = "Kernel code",
+		.start = 0,
+		.end = 0,
+		.flags = IORESOURCE_MEM
+	},
+	{
+		.name = "Kernel data",
+		.start = 0,
+		.end = 0,
+		.flags = IORESOURCE_MEM
+	}
+};
+
+#define video_ram   mem_res[0]
+#define kernel_code mem_res[1]
+#define kernel_data mem_res[2]
+
+static struct resource io_res[] = {
+	{
+		.name = "reserved",
+		.start = 0x3bc,
+		.end = 0x3be,
+		.flags = IORESOURCE_IO | IORESOURCE_BUSY
+	},
+	{
+		.name = "reserved",
+		.start = 0x378,
+		.end = 0x37f,
+		.flags = IORESOURCE_IO | IORESOURCE_BUSY
+	},
+	{
+		.name = "reserved",
+		.start = 0x278,
+		.end = 0x27f,
+		.flags = IORESOURCE_IO | IORESOURCE_BUSY
+	}
+};
+
+#define lp0 io_res[0]
+#define lp1 io_res[1]
+#define lp2 io_res[2]
+
+static const char *proc_arch[] = {
+	"undefined/unknown",
+	"3",
+	"4",
+	"4T",
+	"5",
+	"5T",
+	"5TE",
+	"5TEJ",
+	"6TEJ",
+	"7",
+	"7M",
+	"?(12)",
+	"?(13)",
+	"?(14)",
+	"?(15)",
+	"?(16)",
+	"?(17)",
+};
+
+#ifdef CONFIG_CPU_V7M
+static int __get_cpu_architecture(void)
+{
+	return CPU_ARCH_ARMv7M;
+}
+#else
+static int __get_cpu_architecture(void)
+{
+	int cpu_arch;
+
+	if ((read_cpuid_id() & 0x0008f000) == 0) {
+		cpu_arch = CPU_ARCH_UNKNOWN;
+	} else if ((read_cpuid_id() & 0x0008f000) == 0x00007000) {
+		cpu_arch = (read_cpuid_id() & (1 << 23)) ? CPU_ARCH_ARMv4T : CPU_ARCH_ARMv3;
+	} else if ((read_cpuid_id() & 0x00080000) == 0x00000000) {
+		cpu_arch = (read_cpuid_id() >> 16) & 7;
+		if (cpu_arch)
+			cpu_arch += CPU_ARCH_ARMv3;
+	} else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
+		/* Revised CPUID format. Read the Memory Model Feature
+		 * Register 0 and check for VMSAv7 or PMSAv7 */
+		unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0);
+		if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
+		    (mmfr0 & 0x000000f0) >= 0x00000030)
+			cpu_arch = CPU_ARCH_ARMv7;
+		else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
+			 (mmfr0 & 0x000000f0) == 0x00000020)
+			cpu_arch = CPU_ARCH_ARMv6;
+		else
+			cpu_arch = CPU_ARCH_UNKNOWN;
+	} else
+		cpu_arch = CPU_ARCH_UNKNOWN;
+
+	return cpu_arch;
+}
+#endif
+
+int __pure cpu_architecture(void)
+{
+	BUG_ON(__cpu_architecture == CPU_ARCH_UNKNOWN);
+
+	return __cpu_architecture;
+}
+
+static int cpu_has_aliasing_icache(unsigned int arch)
+{
+	int aliasing_icache;
+	unsigned int id_reg, num_sets, line_size;
+
+	/* PIPT caches never alias. */
+	if (icache_is_pipt())
+		return 0;
+
+	/* arch specifies the register format */
+	switch (arch) {
+	case CPU_ARCH_ARMv7:
+		asm("mcr	p15, 2, %0, c0, c0, 0 @ set CSSELR"
+		    : /* No output operands */
+		    : "r" (1));
+		isb();
+		asm("mrc	p15, 1, %0, c0, c0, 0 @ read CCSIDR"
+		    : "=r" (id_reg));
+		line_size = 4 << ((id_reg & 0x7) + 2);
+		num_sets = ((id_reg >> 13) & 0x7fff) + 1;
+		aliasing_icache = (line_size * num_sets) > PAGE_SIZE;
+		break;
+	case CPU_ARCH_ARMv6:
+		aliasing_icache = read_cpuid_cachetype() & (1 << 11);
+		break;
+	default:
+		/* I-cache aliases will be handled by D-cache aliasing code */
+		aliasing_icache = 0;
+	}
+
+	return aliasing_icache;
+}
+
+static void __init cacheid_init(void)
+{
+	unsigned int arch = cpu_architecture();
+
+	if (arch == CPU_ARCH_ARMv7M) {
+		cacheid = 0;
+	} else if (arch >= CPU_ARCH_ARMv6) {
+		unsigned int cachetype = read_cpuid_cachetype();
+		if ((cachetype & (7 << 29)) == 4 << 29) {
+			/* ARMv7 register format */
+			arch = CPU_ARCH_ARMv7;
+			cacheid = CACHEID_VIPT_NONALIASING;
+			switch (cachetype & (3 << 14)) {
+			case (1 << 14):
+				cacheid |= CACHEID_ASID_TAGGED;
+				break;
+			case (3 << 14):
+				cacheid |= CACHEID_PIPT;
+				break;
+			}
+		} else {
+			arch = CPU_ARCH_ARMv6;
+			if (cachetype & (1 << 23))
+				cacheid = CACHEID_VIPT_ALIASING;
+			else
+				cacheid = CACHEID_VIPT_NONALIASING;
+		}
+		if (cpu_has_aliasing_icache(arch))
+			cacheid |= CACHEID_VIPT_I_ALIASING;
+	} else {
+		cacheid = CACHEID_VIVT;
+	}
+
+	pr_info("CPU: %s data cache, %s instruction cache\n",
+		cache_is_vivt() ? "VIVT" :
+		cache_is_vipt_aliasing() ? "VIPT aliasing" :
+		cache_is_vipt_nonaliasing() ? "PIPT / VIPT nonaliasing" : "unknown",
+		cache_is_vivt() ? "VIVT" :
+		icache_is_vivt_asid_tagged() ? "VIVT ASID tagged" :
+		icache_is_vipt_aliasing() ? "VIPT aliasing" :
+		icache_is_pipt() ? "PIPT" :
+		cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown");
+}
+
+/*
+ * These functions re-use the assembly code in head.S, which
+ * already provide the required functionality.
+ */
+extern struct proc_info_list *lookup_processor_type(unsigned int);
+
+void __init early_print(const char *str, ...)
+{
+	extern void printascii(const char *);
+	char buf[256];
+	va_list ap;
+
+	va_start(ap, str);
+	vsnprintf(buf, sizeof(buf), str, ap);
+	va_end(ap);
+
+#ifdef CONFIG_DEBUG_LL
+	printascii(buf);
+#endif
+	printk("%s", buf);
+}
+
+static void __init cpuid_init_hwcaps(void)
+{
+	int block;
+	u32 isar5;
+
+	if (cpu_architecture() < CPU_ARCH_ARMv7)
+		return;
+
+	block = cpuid_feature_extract(CPUID_EXT_ISAR0, 24);
+	if (block >= 2)
+		elf_hwcap |= HWCAP_IDIVA;
+	if (block >= 1)
+		elf_hwcap |= HWCAP_IDIVT;
+
+	/* LPAE implies atomic ldrd/strd instructions */
+	block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
+	if (block >= 5)
+		elf_hwcap |= HWCAP_LPAE;
+
+	/* check for supported v8 Crypto instructions */
+	isar5 = read_cpuid_ext(CPUID_EXT_ISAR5);
+
+	block = cpuid_feature_extract_field(isar5, 4);
+	if (block >= 2)
+		elf_hwcap2 |= HWCAP2_PMULL;
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_AES;
+
+	block = cpuid_feature_extract_field(isar5, 8);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA1;
+
+	block = cpuid_feature_extract_field(isar5, 12);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA2;
+
+	block = cpuid_feature_extract_field(isar5, 16);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_CRC32;
+}
+
+static void __init elf_hwcap_fixup(void)
+{
+	unsigned id = read_cpuid_id();
+
+	/*
+	 * HWCAP_TLS is available only on 1136 r1p0 and later,
+	 * see also kuser_get_tls_init.
+	 */
+	if (read_cpuid_part() == ARM_CPU_PART_ARM1136 &&
+	    ((id >> 20) & 3) == 0) {
+		elf_hwcap &= ~HWCAP_TLS;
+		return;
+	}
+
+	/* Verify if CPUID scheme is implemented */
+	if ((id & 0x000f0000) != 0x000f0000)
+		return;
+
+	/*
+	 * If the CPU supports LDREX/STREX and LDREXB/STREXB,
+	 * avoid advertising SWP; it may not be atomic with
+	 * multiprocessing cores.
+	 */
+	if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 ||
+	    (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 &&
+	     cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3))
+		elf_hwcap &= ~HWCAP_SWP;
+}
+
+/*
+ * cpu_init - initialise one CPU.
+ *
+ * cpu_init sets up the per-CPU stacks.
+ */
+void notrace cpu_init(void)
+{
+#ifndef CONFIG_CPU_V7M
+	unsigned int cpu = smp_processor_id();
+	struct stack *stk = &stacks[cpu];
+
+	if (cpu >= NR_CPUS) {
+		pr_crit("CPU%u: bad primary CPU number\n", cpu);
+		BUG();
+	}
+
+	/*
+	 * This only works on resume and secondary cores. For booting on the
+	 * boot cpu, smp_prepare_boot_cpu is called after percpu area setup.
+	 */
+	set_my_cpu_offset(per_cpu_offset(cpu));
+
+	cpu_proc_init();
+
+	/*
+	 * Define the placement constraint for the inline asm directive below.
+	 * In Thumb-2, msr with an immediate value is not allowed.
+	 */
+#ifdef CONFIG_THUMB2_KERNEL
+#define PLC	"r"
+#else
+#define PLC	"I"
+#endif
+
+	/*
+	 * setup stacks for re-entrant exception handlers
+	 */
+	__asm__ (
+	"msr	cpsr_c, %1\n\t"
+	"add	r14, %0, %2\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %3\n\t"
+	"add	r14, %0, %4\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %5\n\t"
+	"add	r14, %0, %6\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %7\n\t"
+	"add	r14, %0, %8\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %9"
+	    :
+	    : "r" (stk),
+	      PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
+	      "I" (offsetof(struct stack, irq[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
+	      "I" (offsetof(struct stack, abt[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
+	      "I" (offsetof(struct stack, und[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
+	      "I" (offsetof(struct stack, fiq[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
+	    : "r14");
+#endif
+}
+
+u32 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID };
+
+void __init smp_setup_processor_id(void)
+{
+	int i;
+	u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
+	u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+
+	cpu_logical_map(0) = cpu;
+	for (i = 1; i < nr_cpu_ids; ++i)
+		cpu_logical_map(i) = i == cpu ? 0 : i;
+
+	/*
+	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
+	 * using percpu variable early, for example, lockdep will
+	 * access percpu variable inside lock_release
+	 */
+	set_my_cpu_offset(0);
+
+	pr_info("Booting Linux on physical CPU 0x%x\n", mpidr);
+}
+
+struct mpidr_hash mpidr_hash;
+#ifdef CONFIG_SMP
+/**
+ * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
+ *			  level in order to build a linear index from an
+ *			  MPIDR value. Resulting algorithm is a collision
+ *			  free hash carried out through shifting and ORing
+ */
+static void __init smp_build_mpidr_hash(void)
+{
+	u32 i, affinity;
+	u32 fs[3], bits[3], ls, mask = 0;
+	/*
+	 * Pre-scan the list of MPIDRS and filter out bits that do
+	 * not contribute to affinity levels, ie they never toggle.
+	 */
+	for_each_possible_cpu(i)
+		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
+	pr_debug("mask of set bits 0x%x\n", mask);
+	/*
+	 * Find and stash the last and first bit set at all affinity levels to
+	 * check how many bits are required to represent them.
+	 */
+	for (i = 0; i < 3; i++) {
+		affinity = MPIDR_AFFINITY_LEVEL(mask, i);
+		/*
+		 * Find the MSB bit and LSB bits position
+		 * to determine how many bits are required
+		 * to express the affinity level.
+		 */
+		ls = fls(affinity);
+		fs[i] = affinity ? ffs(affinity) - 1 : 0;
+		bits[i] = ls - fs[i];
+	}
+	/*
+	 * An index can be created from the MPIDR by isolating the
+	 * significant bits at each affinity level and by shifting
+	 * them in order to compress the 24 bits values space to a
+	 * compressed set of values. This is equivalent to hashing
+	 * the MPIDR through shifting and ORing. It is a collision free
+	 * hash though not minimal since some levels might contain a number
+	 * of CPUs that is not an exact power of 2 and their bit
+	 * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}.
+	 */
+	mpidr_hash.shift_aff[0] = fs[0];
+	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0];
+	mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] -
+						(bits[1] + bits[0]);
+	mpidr_hash.mask = mask;
+	mpidr_hash.bits = bits[2] + bits[1] + bits[0];
+	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n",
+				mpidr_hash.shift_aff[0],
+				mpidr_hash.shift_aff[1],
+				mpidr_hash.shift_aff[2],
+				mpidr_hash.mask,
+				mpidr_hash.bits);
+	/*
+	 * 4x is an arbitrary value used to warn on a hash table much bigger
+	 * than expected on most systems.
+	 */
+	if (mpidr_hash_size() > 4 * num_possible_cpus())
+		pr_warn("Large number of MPIDR hash buckets detected\n");
+	sync_cache_w(&mpidr_hash);
+}
+#endif
+
+static void __init setup_processor(void)
+{
+	struct proc_info_list *list;
+
+	/*
+	 * locate processor in the list of supported processor
+	 * types.  The linker builds this table for us from the
+	 * entries in arch/arm/mm/proc-*.S
+	 */
+	list = lookup_processor_type(read_cpuid_id());
+	if (!list) {
+		pr_err("CPU configuration botched (ID %08x), unable to continue.\n",
+		       read_cpuid_id());
+		while (1);
+	}
+
+	cpu_name = list->cpu_name;
+	__cpu_architecture = __get_cpu_architecture();
+
+#ifdef MULTI_CPU
+	processor = *list->proc;
+#endif
+#ifdef MULTI_TLB
+	cpu_tlb = *list->tlb;
+#endif
+#ifdef MULTI_USER
+	cpu_user = *list->user;
+#endif
+#ifdef MULTI_CACHE
+	cpu_cache = *list->cache;
+#endif
+
+	pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
+		cpu_name, read_cpuid_id(), read_cpuid_id() & 15,
+		proc_arch[cpu_architecture()], get_cr());
+
+	snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c",
+		 list->arch_name, ENDIANNESS);
+	snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c",
+		 list->elf_name, ENDIANNESS);
+	elf_hwcap = list->elf_hwcap;
+
+	cpuid_init_hwcaps();
+
+#ifndef CONFIG_ARM_THUMB
+	elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);
+#endif
+#ifdef CONFIG_MMU
+	init_default_cache_policy(list->__cpu_mm_mmu_flags);
+#endif
+	erratum_a15_798181_init();
+
+	elf_hwcap_fixup();
+
+	cacheid_init();
+	cpu_init();
+}
+
+void __init dump_machine_table(void)
+{
+	const struct machine_desc *p;
+
+	early_print("Available machine support:\n\nID (hex)\tNAME\n");
+	for_each_machine_desc(p)
+		early_print("%08x\t%s\n", p->nr, p->name);
+
+	early_print("\nPlease check your kernel config and/or bootloader.\n");
+
+	while (true)
+		/* can't use cpu_relax() here as it may require MMU setup */;
+}
+
+int __init arm_add_memory(u64 start, u64 size)
+{
+	u64 aligned_start;
+
+	/*
+	 * Ensure that start/size are aligned to a page boundary.
+	 * Size is rounded down, start is rounded up.
+	 */
+	aligned_start = PAGE_ALIGN(start);
+	if (aligned_start > start + size)
+		size = 0;
+	else
+		size -= aligned_start - start;
+
+#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT
+	if (aligned_start > ULONG_MAX) {
+		pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n",
+			(long long)start);
+		return -EINVAL;
+	}
+
+	if (aligned_start + size > ULONG_MAX) {
+		pr_crit("Truncating memory at 0x%08llx to fit in 32-bit physical address space\n",
+			(long long)start);
+		/*
+		 * To ensure bank->start + bank->size is representable in
+		 * 32 bits, we use ULONG_MAX as the upper limit rather than 4GB.
+		 * This means we lose a page after masking.
+		 */
+		size = ULONG_MAX - aligned_start;
+	}
+#endif
+
+	if (aligned_start < PHYS_OFFSET) {
+		if (aligned_start + size <= PHYS_OFFSET) {
+			pr_info("Ignoring memory below PHYS_OFFSET: 0x%08llx-0x%08llx\n",
+				aligned_start, aligned_start + size);
+			return -EINVAL;
+		}
+
+		pr_info("Ignoring memory below PHYS_OFFSET: 0x%08llx-0x%08llx\n",
+			aligned_start, (u64)PHYS_OFFSET);
+
+		size -= PHYS_OFFSET - aligned_start;
+		aligned_start = PHYS_OFFSET;
+	}
+
+	start = aligned_start;
+	size = size & ~(phys_addr_t)(PAGE_SIZE - 1);
+
+	/*
+	 * Check whether this memory region has non-zero size or
+	 * invalid node number.
+	 */
+	if (size == 0)
+		return -EINVAL;
+
+	memblock_add(start, size);
+	return 0;
+}
+
+/*
+ * Pick out the memory size.  We look for mem=size@start,
+ * where start and size are "size[KkMm]"
+ */
+
+static int __init early_mem(char *p)
+{
+	static int usermem __initdata = 0;
+	u64 size;
+	u64 start;
+	char *endp;
+
+	/*
+	 * If the user specifies memory size, we
+	 * blow away any automatically generated
+	 * size.
+	 */
+	if (usermem == 0) {
+		usermem = 1;
+		memblock_remove(memblock_start_of_DRAM(),
+			memblock_end_of_DRAM() - memblock_start_of_DRAM());
+	}
+
+	start = PHYS_OFFSET;
+	size  = memparse(p, &endp);
+	if (*endp == '@')
+		start = memparse(endp + 1, NULL);
+
+	arm_add_memory(start, size);
+
+	return 0;
+}
+early_param("mem", early_mem);
+
+static void __init request_standard_resources(const struct machine_desc *mdesc)
+{
+	struct memblock_region *region;
+	struct resource *res;
+
+	kernel_code.start   = virt_to_phys(_text);
+	kernel_code.end     = virt_to_phys(_etext - 1);
+	kernel_data.start   = virt_to_phys(_sdata);
+	kernel_data.end     = virt_to_phys(_end - 1);
+
+	for_each_memblock(memory, region) {
+		res = memblock_virt_alloc(sizeof(*res), 0);
+		res->name  = "System RAM";
+		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+		request_resource(&iomem_resource, res);
+
+		if (kernel_code.start >= res->start &&
+		    kernel_code.end <= res->end)
+			request_resource(res, &kernel_code);
+		if (kernel_data.start >= res->start &&
+		    kernel_data.end <= res->end)
+			request_resource(res, &kernel_data);
+	}
+
+	if (mdesc->video_start) {
+		video_ram.start = mdesc->video_start;
+		video_ram.end   = mdesc->video_end;
+		request_resource(&iomem_resource, &video_ram);
+	}
+
+	/*
+	 * Some machines don't have the possibility of ever
+	 * possessing lp0, lp1 or lp2
+	 */
+	if (mdesc->reserve_lp0)
+		request_resource(&ioport_resource, &lp0);
+	if (mdesc->reserve_lp1)
+		request_resource(&ioport_resource, &lp1);
+	if (mdesc->reserve_lp2)
+		request_resource(&ioport_resource, &lp2);
+}
+
+#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
+struct screen_info screen_info = {
+ .orig_video_lines	= 30,
+ .orig_video_cols	= 80,
+ .orig_video_mode	= 0,
+ .orig_video_ega_bx	= 0,
+ .orig_video_isVGA	= 1,
+ .orig_video_points	= 8
+};
+#endif
+
+static int __init customize_machine(void)
+{
+	/*
+	 * customizes platform devices, or adds new ones
+	 * On DT based machines, we fall back to populating the
+	 * machine from the device tree, if no callback is provided,
+	 * otherwise we would always need an init_machine callback.
+	 */
+	of_iommu_init();
+	if (machine_desc->init_machine)
+		machine_desc->init_machine();
+#ifdef CONFIG_OF
+	else
+		of_platform_populate(NULL, of_default_bus_match_table,
+					NULL, NULL);
+#endif
+	return 0;
+}
+arch_initcall(customize_machine);
+
+static int __init init_machine_late(void)
+{
+	if (machine_desc->init_late)
+		machine_desc->init_late();
+	return 0;
+}
+late_initcall(init_machine_late);
+
+#ifdef CONFIG_KEXEC
+static inline unsigned long long get_total_mem(void)
+{
+	unsigned long total;
+
+	total = max_low_pfn - min_low_pfn;
+	return total << PAGE_SHIFT;
+}
+
+/**
+ * reserve_crashkernel() - reserves memory are for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by a dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_size, crash_base;
+	unsigned long long total_mem;
+	int ret;
+
+	total_mem = get_total_mem();
+	ret = parse_crashkernel(boot_command_line, total_mem,
+				&crash_size, &crash_base);
+	if (ret)
+		return;
+
+	ret = memblock_reserve(crash_base, crash_size);
+	if (ret < 0) {
+		pr_warn("crashkernel reservation failed - memory is in use (0x%lx)\n",
+			(unsigned long)crash_base);
+		return;
+	}
+
+	pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
+		(unsigned long)(crash_size >> 20),
+		(unsigned long)(crash_base >> 20),
+		(unsigned long)(total_mem >> 20));
+
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+	insert_resource(&iomem_resource, &crashk_res);
+}
+#else
+static inline void reserve_crashkernel(void) {}
+#endif /* CONFIG_KEXEC */
+
+void __init hyp_mode_check(void)
+{
+#ifdef CONFIG_ARM_VIRT_EXT
+	sync_boot_mode();
+
+	if (is_hyp_mode_available()) {
+		pr_info("CPU: All CPU(s) started in HYP mode.\n");
+		pr_info("CPU: Virtualization extensions available.\n");
+	} else if (is_hyp_mode_mismatched()) {
+		pr_warn("CPU: WARNING: CPU(s) started in wrong/inconsistent modes (primary CPU mode 0x%x)\n",
+			__boot_cpu_mode & MODE_MASK);
+		pr_warn("CPU: This may indicate a broken bootloader or firmware.\n");
+	} else
+		pr_info("CPU: All CPU(s) started in SVC mode.\n");
+#endif
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	const struct machine_desc *mdesc;
+
+	setup_processor();
+	mdesc = setup_machine_fdt(__atags_pointer);
+	if (!mdesc)
+		mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type);
+	machine_desc = mdesc;
+	machine_name = mdesc->name;
+	dump_stack_set_arch_desc("%s", mdesc->name);
+
+	if (mdesc->reboot_mode != REBOOT_HARD)
+		reboot_mode = mdesc->reboot_mode;
+
+	init_mm.start_code = (unsigned long) _text;
+	init_mm.end_code   = (unsigned long) _etext;
+	init_mm.end_data   = (unsigned long) _edata;
+	init_mm.brk	   = (unsigned long) _end;
+
+	/* populate cmd_line too for later use, preserving boot_command_line */
+	strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
+	*cmdline_p = cmd_line;
+
+	parse_early_param();
+
+	early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
+	setup_dma_zone(mdesc);
+	sanity_check_meminfo();
+	arm_memblock_init(mdesc);
+
+	paging_init(mdesc);
+	request_standard_resources(mdesc);
+
+	if (mdesc->restart)
+		arm_pm_restart = mdesc->restart;
+
+	unflatten_device_tree();
+
+	arm_dt_init_cpu_maps();
+	psci_init();
+#ifdef CONFIG_SMP
+	if (is_smp()) {
+		if (!mdesc->smp_init || !mdesc->smp_init()) {
+			if (psci_smp_available())
+				smp_set_ops(&psci_smp_ops);
+			else if (mdesc->smp)
+				smp_set_ops(mdesc->smp);
+		}
+		smp_init_cpus();
+		smp_build_mpidr_hash();
+	}
+#endif
+
+	if (!is_smp())
+		hyp_mode_check();
+
+	reserve_crashkernel();
+
+#ifdef CONFIG_MULTI_IRQ_HANDLER
+	handle_arch_irq = mdesc->handle_irq;
+#endif
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+
+	if (mdesc->init_early)
+		mdesc->init_early();
+}
+
+
+static int __init topology_init(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu);
+		cpuinfo->cpu.hotpluggable = 1;
+		register_cpu(&cpuinfo->cpu, cpu);
+	}
+
+	return 0;
+}
+subsys_initcall(topology_init);
+
+#ifdef CONFIG_HAVE_PROC_CPU
+static int __init proc_cpu_init(void)
+{
+	struct proc_dir_entry *res;
+
+	res = proc_mkdir("cpu", NULL);
+	if (!res)
+		return -ENOMEM;
+	return 0;
+}
+fs_initcall(proc_cpu_init);
+#endif
+
+static const char *hwcap_str[] = {
+	"swp",
+	"half",
+	"thumb",
+	"26bit",
+	"fastmult",
+	"fpa",
+	"vfp",
+	"edsp",
+	"java",
+	"iwmmxt",
+	"crunch",
+	"thumbee",
+	"neon",
+	"vfpv3",
+	"vfpv3d16",
+	"tls",
+	"vfpv4",
+	"idiva",
+	"idivt",
+	"vfpd32",
+	"lpae",
+	"evtstrm",
+	NULL
+};
+
+static const char *hwcap2_str[] = {
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+
+static int c_show(struct seq_file *m, void *v)
+{
+	int i, j;
+	u32 cpuid;
+
+	for_each_online_cpu(i) {
+		/*
+		 * glibc reads /proc/cpuinfo to determine the number of
+		 * online processors, looking for lines beginning with
+		 * "processor".  Give glibc what it expects.
+		 */
+		seq_printf(m, "processor\t: %d\n", i);
+		cpuid = is_smp() ? per_cpu(cpu_data, i).cpuid : read_cpuid_id();
+		seq_printf(m, "model name\t: %s rev %d (%s)\n",
+			   cpu_name, cpuid & 15, elf_platform);
+
+#if defined(CONFIG_SMP)
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+			   per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ),
+			   (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100);
+#else
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+			   loops_per_jiffy / (500000/HZ),
+			   (loops_per_jiffy / (5000/HZ)) % 100);
+#endif
+		/* dump out the processor features */
+		seq_puts(m, "Features\t: ");
+
+		for (j = 0; hwcap_str[j]; j++)
+			if (elf_hwcap & (1 << j))
+				seq_printf(m, "%s ", hwcap_str[j]);
+
+		for (j = 0; hwcap2_str[j]; j++)
+			if (elf_hwcap2 & (1 << j))
+				seq_printf(m, "%s ", hwcap2_str[j]);
+
+		seq_printf(m, "\nCPU implementer\t: 0x%02x\n", cpuid >> 24);
+		seq_printf(m, "CPU architecture: %s\n",
+			   proc_arch[cpu_architecture()]);
+
+		if ((cpuid & 0x0008f000) == 0x00000000) {
+			/* pre-ARM7 */
+			seq_printf(m, "CPU part\t: %07x\n", cpuid >> 4);
+		} else {
+			if ((cpuid & 0x0008f000) == 0x00007000) {
+				/* ARM7 */
+				seq_printf(m, "CPU variant\t: 0x%02x\n",
+					   (cpuid >> 16) & 127);
+			} else {
+				/* post-ARM7 */
+				seq_printf(m, "CPU variant\t: 0x%x\n",
+					   (cpuid >> 20) & 15);
+			}
+			seq_printf(m, "CPU part\t: 0x%03x\n",
+				   (cpuid >> 4) & 0xfff);
+		}
+		seq_printf(m, "CPU revision\t: %d\n\n", cpuid & 15);
+	}
+
+	seq_printf(m, "Hardware\t: %s\n", machine_name);
+	seq_printf(m, "Revision\t: %04x\n", system_rev);
+	seq_printf(m, "Serial\t\t: %08x%08x\n",
+		   system_serial_high, system_serial_low);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
new file mode 100644
index 000000000..423663e23
--- /dev/null
+++ b/arch/arm/kernel/signal.c
@@ -0,0 +1,624 @@
+/*
+ *  linux/arch/arm/kernel/signal.c
+ *
+ *  Copyright (C) 1995-2009 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <linux/signal.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
+#include <linux/tracehook.h>
+#include <linux/uprobes.h>
+
+#include <asm/elf.h>
+#include <asm/cacheflush.h>
+#include <asm/traps.h>
+#include <asm/ucontext.h>
+#include <asm/unistd.h>
+#include <asm/vfp.h>
+
+extern const unsigned long sigreturn_codes[7];
+
+static unsigned long signal_return_offset;
+
+#ifdef CONFIG_CRUNCH
+static int preserve_crunch_context(struct crunch_sigframe __user *frame)
+{
+	char kbuf[sizeof(*frame) + 8];
+	struct crunch_sigframe *kframe;
+
+	/* the crunch context must be 64 bit aligned */
+	kframe = (struct crunch_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	kframe->magic = CRUNCH_MAGIC;
+	kframe->size = CRUNCH_STORAGE_SIZE;
+	crunch_task_copy(current_thread_info(), &kframe->storage);
+	return __copy_to_user(frame, kframe, sizeof(*frame));
+}
+
+static int restore_crunch_context(struct crunch_sigframe __user *frame)
+{
+	char kbuf[sizeof(*frame) + 8];
+	struct crunch_sigframe *kframe;
+
+	/* the crunch context must be 64 bit aligned */
+	kframe = (struct crunch_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	if (__copy_from_user(kframe, frame, sizeof(*frame)))
+		return -1;
+	if (kframe->magic != CRUNCH_MAGIC ||
+	    kframe->size != CRUNCH_STORAGE_SIZE)
+		return -1;
+	crunch_task_restore(current_thread_info(), &kframe->storage);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_IWMMXT
+
+static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame)
+{
+	char kbuf[sizeof(*frame) + 8];
+	struct iwmmxt_sigframe *kframe;
+
+	/* the iWMMXt context must be 64 bit aligned */
+	kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	kframe->magic = IWMMXT_MAGIC;
+	kframe->size = IWMMXT_STORAGE_SIZE;
+	iwmmxt_task_copy(current_thread_info(), &kframe->storage);
+	return __copy_to_user(frame, kframe, sizeof(*frame));
+}
+
+static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame)
+{
+	char kbuf[sizeof(*frame) + 8];
+	struct iwmmxt_sigframe *kframe;
+
+	/* the iWMMXt context must be 64 bit aligned */
+	kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	if (__copy_from_user(kframe, frame, sizeof(*frame)))
+		return -1;
+	if (kframe->magic != IWMMXT_MAGIC ||
+	    kframe->size != IWMMXT_STORAGE_SIZE)
+		return -1;
+	iwmmxt_task_restore(current_thread_info(), &kframe->storage);
+	return 0;
+}
+
+#endif
+
+#ifdef CONFIG_VFP
+
+static int preserve_vfp_context(struct vfp_sigframe __user *frame)
+{
+	const unsigned long magic = VFP_MAGIC;
+	const unsigned long size = VFP_STORAGE_SIZE;
+	int err = 0;
+
+	__put_user_error(magic, &frame->magic, err);
+	__put_user_error(size, &frame->size, err);
+
+	if (err)
+		return -EFAULT;
+
+	return vfp_preserve_user_clear_hwstate(&frame->ufp, &frame->ufp_exc);
+}
+
+static int restore_vfp_context(struct vfp_sigframe __user *frame)
+{
+	unsigned long magic;
+	unsigned long size;
+	int err = 0;
+
+	__get_user_error(magic, &frame->magic, err);
+	__get_user_error(size, &frame->size, err);
+
+	if (err)
+		return -EFAULT;
+	if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE)
+		return -EINVAL;
+
+	return vfp_restore_user_hwstate(&frame->ufp, &frame->ufp_exc);
+}
+
+#endif
+
+/*
+ * Do a signal return; undo the signal stack.  These are aligned to 64-bit.
+ */
+struct sigframe {
+	struct ucontext uc;
+	unsigned long retcode[2];
+};
+
+struct rt_sigframe {
+	struct siginfo info;
+	struct sigframe sig;
+};
+
+static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf)
+{
+	struct aux_sigframe __user *aux;
+	sigset_t set;
+	int err;
+
+	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
+	if (err == 0)
+		set_current_blocked(&set);
+
+	__get_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err);
+	__get_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err);
+	__get_user_error(regs->ARM_r2, &sf->uc.uc_mcontext.arm_r2, err);
+	__get_user_error(regs->ARM_r3, &sf->uc.uc_mcontext.arm_r3, err);
+	__get_user_error(regs->ARM_r4, &sf->uc.uc_mcontext.arm_r4, err);
+	__get_user_error(regs->ARM_r5, &sf->uc.uc_mcontext.arm_r5, err);
+	__get_user_error(regs->ARM_r6, &sf->uc.uc_mcontext.arm_r6, err);
+	__get_user_error(regs->ARM_r7, &sf->uc.uc_mcontext.arm_r7, err);
+	__get_user_error(regs->ARM_r8, &sf->uc.uc_mcontext.arm_r8, err);
+	__get_user_error(regs->ARM_r9, &sf->uc.uc_mcontext.arm_r9, err);
+	__get_user_error(regs->ARM_r10, &sf->uc.uc_mcontext.arm_r10, err);
+	__get_user_error(regs->ARM_fp, &sf->uc.uc_mcontext.arm_fp, err);
+	__get_user_error(regs->ARM_ip, &sf->uc.uc_mcontext.arm_ip, err);
+	__get_user_error(regs->ARM_sp, &sf->uc.uc_mcontext.arm_sp, err);
+	__get_user_error(regs->ARM_lr, &sf->uc.uc_mcontext.arm_lr, err);
+	__get_user_error(regs->ARM_pc, &sf->uc.uc_mcontext.arm_pc, err);
+	__get_user_error(regs->ARM_cpsr, &sf->uc.uc_mcontext.arm_cpsr, err);
+
+	err |= !valid_user_regs(regs);
+
+	aux = (struct aux_sigframe __user *) sf->uc.uc_regspace;
+#ifdef CONFIG_CRUNCH
+	if (err == 0)
+		err |= restore_crunch_context(&aux->crunch);
+#endif
+#ifdef CONFIG_IWMMXT
+	if (err == 0 && test_thread_flag(TIF_USING_IWMMXT))
+		err |= restore_iwmmxt_context(&aux->iwmmxt);
+#endif
+#ifdef CONFIG_VFP
+	if (err == 0)
+		err |= restore_vfp_context(&aux->vfp);
+#endif
+
+	return err;
+}
+
+asmlinkage int sys_sigreturn(struct pt_regs *regs)
+{
+	struct sigframe __user *frame;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/*
+	 * Since we stacked the signal on a 64-bit boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->ARM_sp & 7)
+		goto badframe;
+
+	frame = (struct sigframe __user *)regs->ARM_sp;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+		goto badframe;
+
+	if (restore_sigframe(regs, frame))
+		goto badframe;
+
+	return regs->ARM_r0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/*
+	 * Since we stacked the signal on a 64-bit boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->ARM_sp & 7)
+		goto badframe;
+
+	frame = (struct rt_sigframe __user *)regs->ARM_sp;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+		goto badframe;
+
+	if (restore_sigframe(regs, &frame->sig))
+		goto badframe;
+
+	if (restore_altstack(&frame->sig.uc.uc_stack))
+		goto badframe;
+
+	return regs->ARM_r0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+static int
+setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set)
+{
+	struct aux_sigframe __user *aux;
+	int err = 0;
+
+	__put_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err);
+	__put_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err);
+	__put_user_error(regs->ARM_r2, &sf->uc.uc_mcontext.arm_r2, err);
+	__put_user_error(regs->ARM_r3, &sf->uc.uc_mcontext.arm_r3, err);
+	__put_user_error(regs->ARM_r4, &sf->uc.uc_mcontext.arm_r4, err);
+	__put_user_error(regs->ARM_r5, &sf->uc.uc_mcontext.arm_r5, err);
+	__put_user_error(regs->ARM_r6, &sf->uc.uc_mcontext.arm_r6, err);
+	__put_user_error(regs->ARM_r7, &sf->uc.uc_mcontext.arm_r7, err);
+	__put_user_error(regs->ARM_r8, &sf->uc.uc_mcontext.arm_r8, err);
+	__put_user_error(regs->ARM_r9, &sf->uc.uc_mcontext.arm_r9, err);
+	__put_user_error(regs->ARM_r10, &sf->uc.uc_mcontext.arm_r10, err);
+	__put_user_error(regs->ARM_fp, &sf->uc.uc_mcontext.arm_fp, err);
+	__put_user_error(regs->ARM_ip, &sf->uc.uc_mcontext.arm_ip, err);
+	__put_user_error(regs->ARM_sp, &sf->uc.uc_mcontext.arm_sp, err);
+	__put_user_error(regs->ARM_lr, &sf->uc.uc_mcontext.arm_lr, err);
+	__put_user_error(regs->ARM_pc, &sf->uc.uc_mcontext.arm_pc, err);
+	__put_user_error(regs->ARM_cpsr, &sf->uc.uc_mcontext.arm_cpsr, err);
+
+	__put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no, err);
+	__put_user_error(current->thread.error_code, &sf->uc.uc_mcontext.error_code, err);
+	__put_user_error(current->thread.address, &sf->uc.uc_mcontext.fault_address, err);
+	__put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err);
+
+	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
+
+	aux = (struct aux_sigframe __user *) sf->uc.uc_regspace;
+#ifdef CONFIG_CRUNCH
+	if (err == 0)
+		err |= preserve_crunch_context(&aux->crunch);
+#endif
+#ifdef CONFIG_IWMMXT
+	if (err == 0 && test_thread_flag(TIF_USING_IWMMXT))
+		err |= preserve_iwmmxt_context(&aux->iwmmxt);
+#endif
+#ifdef CONFIG_VFP
+	if (err == 0)
+		err |= preserve_vfp_context(&aux->vfp);
+#endif
+	__put_user_error(0, &aux->end_magic, err);
+
+	return err;
+}
+
+static inline void __user *
+get_sigframe(struct ksignal *ksig, struct pt_regs *regs, int framesize)
+{
+	unsigned long sp = sigsp(regs->ARM_sp, ksig);
+	void __user *frame;
+
+	/*
+	 * ATPCS B01 mandates 8-byte alignment
+	 */
+	frame = (void __user *)((sp - framesize) & ~7);
+
+	/*
+	 * Check that we can actually write to the signal frame.
+	 */
+	if (!access_ok(VERIFY_WRITE, frame, framesize))
+		frame = NULL;
+
+	return frame;
+}
+
+static int
+setup_return(struct pt_regs *regs, struct ksignal *ksig,
+	     unsigned long __user *rc, void __user *frame)
+{
+	unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler;
+	unsigned long retcode;
+	int thumb = 0;
+	unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT);
+
+	cpsr |= PSR_ENDSTATE;
+
+	/*
+	 * Maybe we need to deliver a 32-bit signal to a 26-bit task.
+	 */
+	if (ksig->ka.sa.sa_flags & SA_THIRTYTWO)
+		cpsr = (cpsr & ~MODE_MASK) | USR_MODE;
+
+#ifdef CONFIG_ARM_THUMB
+	if (elf_hwcap & HWCAP_THUMB) {
+		/*
+		 * The LSB of the handler determines if we're going to
+		 * be using THUMB or ARM mode for this signal handler.
+		 */
+		thumb = handler & 1;
+
+#if __LINUX_ARM_ARCH__ >= 7
+		/*
+		 * Clear the If-Then Thumb-2 execution state
+		 * ARM spec requires this to be all 000s in ARM mode
+		 * Snapdragon S4/Krait misbehaves on a Thumb=>ARM
+		 * signal transition without this.
+		 */
+		cpsr &= ~PSR_IT_MASK;
+#endif
+
+		if (thumb) {
+			cpsr |= PSR_T_BIT;
+		} else
+			cpsr &= ~PSR_T_BIT;
+	}
+#endif
+
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		retcode = (unsigned long)ksig->ka.sa.sa_restorer;
+	} else {
+		unsigned int idx = thumb << 1;
+
+		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+			idx += 3;
+
+		/*
+		 * Put the sigreturn code on the stack no matter which return
+		 * mechanism we use in order to remain ABI compliant
+		 */
+		if (__put_user(sigreturn_codes[idx],   rc) ||
+		    __put_user(sigreturn_codes[idx+1], rc+1))
+			return 1;
+
+#ifdef CONFIG_MMU
+		if (cpsr & MODE32_BIT) {
+			struct mm_struct *mm = current->mm;
+
+			/*
+			 * 32-bit code can use the signal return page
+			 * except when the MPU has protected the vectors
+			 * page from PL0
+			 */
+			retcode = mm->context.sigpage + signal_return_offset +
+				  (idx << 2) + thumb;
+		} else
+#endif
+		{
+			/*
+			 * Ensure that the instruction cache sees
+			 * the return code written onto the stack.
+			 */
+			flush_icache_range((unsigned long)rc,
+					   (unsigned long)(rc + 2));
+
+			retcode = ((unsigned long)rc) + thumb;
+		}
+	}
+
+	regs->ARM_r0 = ksig->sig;
+	regs->ARM_sp = (unsigned long)frame;
+	regs->ARM_lr = retcode;
+	regs->ARM_pc = handler;
+	regs->ARM_cpsr = cpsr;
+
+	return 0;
+}
+
+static int
+setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	struct sigframe __user *frame = get_sigframe(ksig, regs, sizeof(*frame));
+	int err = 0;
+
+	if (!frame)
+		return 1;
+
+	/*
+	 * Set uc.uc_flags to a value which sc.trap_no would never have.
+	 */
+	__put_user_error(0x5ac3c35a, &frame->uc.uc_flags, err);
+
+	err |= setup_sigframe(frame, regs, set);
+	if (err == 0)
+		err = setup_return(regs, ksig, frame->retcode, frame);
+
+	return err;
+}
+
+static int
+setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame = get_sigframe(ksig, regs, sizeof(*frame));
+	int err = 0;
+
+	if (!frame)
+		return 1;
+
+	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	__put_user_error(0, &frame->sig.uc.uc_flags, err);
+	__put_user_error(NULL, &frame->sig.uc.uc_link, err);
+
+	err |= __save_altstack(&frame->sig.uc.uc_stack, regs->ARM_sp);
+	err |= setup_sigframe(&frame->sig, regs, set);
+	if (err == 0)
+		err = setup_return(regs, ksig, frame->sig.retcode, frame);
+
+	if (err == 0) {
+		/*
+		 * For realtime signals we must also set the second and third
+		 * arguments for the signal handler.
+		 *   -- Peter Maydell <pmaydell@chiark.greenend.org.uk> 2000-12-06
+		 */
+		regs->ARM_r1 = (unsigned long)&frame->info;
+		regs->ARM_r2 = (unsigned long)&frame->sig.uc;
+	}
+
+	return err;
+}
+
+/*
+ * OK, we're invoking a handler
+ */	
+static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int ret;
+
+	/*
+	 * Set up the stack frame
+	 */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(ksig, oldset, regs);
+	else
+		ret = setup_frame(ksig, oldset, regs);
+
+	/*
+	 * Check that the resulting registers are actually sane.
+	 */
+	ret |= !valid_user_regs(regs);
+
+	signal_setup_done(ret, ksig, 0);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+static int do_signal(struct pt_regs *regs, int syscall)
+{
+	unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
+	struct ksignal ksig;
+	int restart = 0;
+
+	/*
+	 * If we were from a system call, check for system call restarting...
+	 */
+	if (syscall) {
+		continue_addr = regs->ARM_pc;
+		restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4);
+		retval = regs->ARM_r0;
+
+		/*
+		 * Prepare for system call restart.  We do this here so that a
+		 * debugger will see the already changed PSW.
+		 */
+		switch (retval) {
+		case -ERESTART_RESTARTBLOCK:
+			restart -= 2;
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			restart++;
+			regs->ARM_r0 = regs->ARM_ORIG_r0;
+			regs->ARM_pc = restart_addr;
+			break;
+		}
+	}
+
+	/*
+	 * Get the signal to deliver.  When running under ptrace, at this
+	 * point the debugger may change all our registers ...
+	 */
+	/*
+	 * Depending on the signal settings we may need to revert the
+	 * decision to restart the system call.  But skip this if a
+	 * debugger has chosen to restart at a different PC.
+	 */
+	if (get_signal(&ksig)) {
+		/* handler */
+		if (unlikely(restart) && regs->ARM_pc == restart_addr) {
+			if (retval == -ERESTARTNOHAND ||
+			    retval == -ERESTART_RESTARTBLOCK
+			    || (retval == -ERESTARTSYS
+				&& !(ksig.ka.sa.sa_flags & SA_RESTART))) {
+				regs->ARM_r0 = -EINTR;
+				regs->ARM_pc = continue_addr;
+			}
+		}
+		handle_signal(&ksig, regs);
+	} else {
+		/* no handler */
+		restore_saved_sigmask();
+		if (unlikely(restart) && regs->ARM_pc == restart_addr) {
+			regs->ARM_pc = continue_addr;
+			return restart;
+		}
+	}
+	return 0;
+}
+
+asmlinkage int
+do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+{
+	do {
+		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+			schedule();
+		} else {
+			if (unlikely(!user_mode(regs)))
+				return 0;
+			local_irq_enable();
+			if (thread_flags & _TIF_SIGPENDING) {
+				int restart = do_signal(regs, syscall);
+				if (unlikely(restart)) {
+					/*
+					 * Restart without handlers.
+					 * Deal with it without leaving
+					 * the kernel space.
+					 */
+					return restart;
+				}
+				syscall = 0;
+			} else if (thread_flags & _TIF_UPROBE) {
+				uprobe_notify_resume(regs);
+			} else {
+				clear_thread_flag(TIF_NOTIFY_RESUME);
+				tracehook_notify_resume(regs);
+			}
+		}
+		local_irq_disable();
+		thread_flags = current_thread_info()->flags;
+	} while (thread_flags & _TIF_WORK_MASK);
+	return 0;
+}
+
+struct page *get_signal_page(void)
+{
+	unsigned long ptr;
+	unsigned offset;
+	struct page *page;
+	void *addr;
+
+	page = alloc_pages(GFP_KERNEL, 0);
+
+	if (!page)
+		return NULL;
+
+	addr = page_address(page);
+
+	/* Give the signal return code some randomness */
+	offset = 0x200 + (get_random_int() & 0x7fc);
+	signal_return_offset = offset;
+
+	/*
+	 * Copy signal return handlers into the vector page, and
+	 * set sigreturn to be a pointer to these.
+	 */
+	memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
+
+	ptr = (unsigned long)addr + offset;
+	flush_icache_range(ptr, ptr + sizeof(sigreturn_codes));
+
+	return page;
+}
diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S
new file mode 100644
index 000000000..b84d0cb13
--- /dev/null
+++ b/arch/arm/kernel/sigreturn_codes.S
@@ -0,0 +1,102 @@
+/*
+ * sigreturn_codes.S - code sinpets for sigreturn syscalls
+ *
+ * Created by:	Victor Kamensky, 2013-08-13
+ * Copyright:	(C) 2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/unistd.h>
+
+/*
+ * For ARM syscalls, we encode the syscall number into the instruction.
+ * With EABI, the syscall number has to be loaded into r7. As result
+ * ARM syscall sequence snippet will have move and svc in .arm encoding
+ *
+ * For Thumb syscalls, we pass the syscall number via r7.  We therefore
+ * need two 16-bit instructions in .thumb encoding
+ *
+ * Please note sigreturn_codes code are not executed in place. Instead
+ * they just copied by kernel into appropriate places. Code inside of
+ * arch/arm/kernel/signal.c is very sensitive to layout of these code
+ * snippets.
+ */
+
+/*
+ * In CPU_THUMBONLY case kernel arm opcodes are not allowed.
+ * Note in this case codes skips those instructions but it uses .org
+ * directive to keep correct layout of sigreturn_codes array.
+ */
+#ifndef CONFIG_CPU_THUMBONLY
+#define ARM_OK(code...)	code
+#else
+#define ARM_OK(code...)
+#endif
+
+	.macro arm_slot n
+	.org	sigreturn_codes + 12 * (\n)
+ARM_OK(	.arm	)
+	.endm
+
+	.macro thumb_slot n
+	.org	sigreturn_codes + 12 * (\n) + 8
+	.thumb
+	.endm
+
+#if __LINUX_ARM_ARCH__ <= 4
+	/*
+	 * Note we manually set minimally required arch that supports
+	 * required thumb opcodes for early arch versions. It is OK
+	 * for this file to be used in combination with other
+	 * lower arch variants, since these code snippets are only
+	 * used as input data.
+	 */
+	.arch armv4t
+#endif
+
+	.section .rodata
+	.global sigreturn_codes
+	.type	sigreturn_codes, #object
+
+	.align
+
+sigreturn_codes:
+
+	/* ARM sigreturn syscall code snippet */
+	arm_slot 0
+ARM_OK(	mov	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)	)
+ARM_OK(	swi	#(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)	)
+
+	/* Thumb sigreturn syscall code snippet */
+	thumb_slot 0
+	movs	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)
+	swi	#0
+
+	/* ARM sigreturn_rt syscall code snippet */
+	arm_slot 1
+ARM_OK(	mov	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)	)
+ARM_OK(	swi	#(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)	)
+
+	/* Thumb sigreturn_rt syscall code snippet */
+	thumb_slot 1
+	movs	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)
+	swi	#0
+
+	/*
+	 * Note on addtional space: setup_return in signal.c
+	 * algorithm uses two words copy regardless whether
+	 * it is thumb case or not, so we need additional
+	 * word after real last entry.
+	 */
+	arm_slot 2
+	.space	4
+
+	.size	sigreturn_codes, . - sigreturn_codes
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
new file mode 100644
index 000000000..7d37bfc50
--- /dev/null
+++ b/arch/arm/kernel/sleep.S
@@ -0,0 +1,162 @@
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/glue-cache.h>
+#include <asm/glue-proc.h>
+	.text
+
+/*
+ * Implementation of MPIDR hash algorithm through shifting
+ * and OR'ing.
+ *
+ * @dst: register containing hash result
+ * @rs0: register containing affinity level 0 bit shift
+ * @rs1: register containing affinity level 1 bit shift
+ * @rs2: register containing affinity level 2 bit shift
+ * @mpidr: register containing MPIDR value
+ * @mask: register containing MPIDR mask
+ *
+ * Pseudo C-code:
+ *
+ *u32 dst;
+ *
+ *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) {
+ *	u32 aff0, aff1, aff2;
+ *	u32 mpidr_masked = mpidr & mask;
+ *	aff0 = mpidr_masked & 0xff;
+ *	aff1 = mpidr_masked & 0xff00;
+ *	aff2 = mpidr_masked & 0xff0000;
+ *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2);
+ *}
+ * Input registers: rs0, rs1, rs2, mpidr, mask
+ * Output register: dst
+ * Note: input and output registers must be disjoint register sets
+         (eg: a macro instance with mpidr = r1 and dst = r1 is invalid)
+ */
+	.macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask
+	and	\mpidr, \mpidr, \mask			@ mask out MPIDR bits
+	and	\dst, \mpidr, #0xff			@ mask=aff0
+ ARM(	mov	\dst, \dst, lsr \rs0		)	@ dst=aff0>>rs0
+ THUMB(	lsr	\dst, \dst, \rs0		)
+	and	\mask, \mpidr, #0xff00			@ mask = aff1
+ ARM(	orr	\dst, \dst, \mask, lsr \rs1	)	@ dst|=(aff1>>rs1)
+ THUMB(	lsr	\mask, \mask, \rs1		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	and	\mask, \mpidr, #0xff0000		@ mask = aff2
+ ARM(	orr	\dst, \dst, \mask, lsr \rs2	)	@ dst|=(aff2>>rs2)
+ THUMB(	lsr	\mask, \mask, \rs2		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	.endm
+
+/*
+ * Save CPU state for a suspend.  This saves the CPU general purpose
+ * registers, and allocates space on the kernel stack to save the CPU
+ * specific registers and some other data for resume.
+ *  r0 = suspend function arg0
+ *  r1 = suspend function
+ *  r2 = MPIDR value the resuming CPU will use
+ */
+ENTRY(__cpu_suspend)
+	stmfd	sp!, {r4 - r11, lr}
+#ifdef MULTI_CPU
+	ldr	r10, =processor
+	ldr	r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
+#else
+	ldr	r4, =cpu_suspend_size
+#endif
+	mov	r5, sp			@ current virtual SP
+	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
+	sub	sp, sp, r4		@ allocate CPU state on stack
+	ldr	r3, =sleep_save_sp
+	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
+	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
+	ALT_SMP(ldr r0, =mpidr_hash)
+	ALT_UP_B(1f)
+	/* This ldmia relies on the memory layout of the mpidr_hash struct */
+	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
+	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
+	add	r3, r3, r0, lsl #2
+1:	mov	r2, r5			@ virtual SP
+	mov	r1, r4			@ size of save block
+	add	r0, sp, #8		@ pointer to save block
+	bl	__cpu_suspend_save
+	adr	lr, BSYM(cpu_suspend_abort)
+	ldmfd	sp!, {r0, pc}		@ call suspend fn
+ENDPROC(__cpu_suspend)
+	.ltorg
+
+cpu_suspend_abort:
+	ldmia	sp!, {r1 - r3}		@ pop phys pgd, virt SP, phys resume fn
+	teq	r0, #0
+	moveq	r0, #1			@ force non-zero value
+	mov	sp, r2
+	ldmfd	sp!, {r4 - r11, pc}
+ENDPROC(cpu_suspend_abort)
+
+/*
+ * r0 = control register value
+ */
+	.align	5
+	.pushsection	.idmap.text,"ax"
+ENTRY(cpu_resume_mmu)
+	ldr	r3, =cpu_resume_after_mmu
+	instr_sync
+	mcr	p15, 0, r0, c1, c0, 0	@ turn on MMU, I-cache, etc
+	mrc	p15, 0, r0, c0, c0, 0	@ read id reg
+	instr_sync
+	mov	r0, r0
+	mov	r0, r0
+	ret	r3			@ jump to virtual address
+ENDPROC(cpu_resume_mmu)
+	.popsection
+cpu_resume_after_mmu:
+	bl	cpu_init		@ restore the und/abt/irq banked regs
+	mov	r0, #0			@ return zero on success
+	ldmfd	sp!, {r4 - r11, pc}
+ENDPROC(cpu_resume_after_mmu)
+
+	.text
+	.align
+ENTRY(cpu_resume)
+ARM_BE8(setend be)			@ ensure we are in BE mode
+#ifdef CONFIG_ARM_VIRT_EXT
+	bl	__hyp_stub_install_secondary
+#endif
+	safe_svcmode_maskall r1
+	mov	r1, #0
+	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+	ALT_UP_B(1f)
+	adr	r2, mpidr_hash_ptr
+	ldr	r3, [r2]
+	add	r2, r2, r3		@ r2 = struct mpidr_hash phys address
+	/*
+	 * This ldmia relies on the memory layout of the mpidr_hash
+	 * struct mpidr_hash.
+	 */
+	ldmia	r2, { r3-r6 }	@ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
+	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
+1:
+	adr	r0, _sleep_save_sp
+	ldr	r2, [r0]
+	add	r0, r0, r2
+	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
+	ldr	r0, [r0, r1, lsl #2]
+
+	@ load phys pgd, stack, resume fn
+  ARM(	ldmia	r0!, {r1, sp, pc}	)
+THUMB(	ldmia	r0!, {r1, r2, r3}	)
+THUMB(	mov	sp, r2			)
+THUMB(	bx	r3			)
+ENDPROC(cpu_resume)
+
+	.align 2
+_sleep_save_sp:
+	.long	sleep_save_sp - .
+mpidr_hash_ptr:
+	.long	mpidr_hash - .			@ mpidr_hash struct offset
+
+	.data
+	.type	sleep_save_sp, #object
+ENTRY(sleep_save_sp)
+	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
new file mode 100644
index 000000000..f11d82527
--- /dev/null
+++ b/arch/arm/kernel/smp.c
@@ -0,0 +1,724 @@
+/*
+ *  linux/arch/arm/kernel/smp.c
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/clockchips.h>
+#include <linux/completion.h>
+#include <linux/cpufreq.h>
+#include <linux/irq_work.h>
+
+#include <linux/atomic.h>
+#include <asm/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/exception.h>
+#include <asm/idmap.h>
+#include <asm/topology.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/tlbflush.h>
+#include <asm/ptrace.h>
+#include <asm/smp_plat.h>
+#include <asm/virt.h>
+#include <asm/mach/arch.h>
+#include <asm/mpu.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/ipi.h>
+
+/*
+ * as from 2.5, kernels no longer have an init_tasks structure
+ * so we need some other way of telling a new secondary core
+ * where to place its SVC stack
+ */
+struct secondary_data secondary_data;
+
+/*
+ * control for which core is the next to come out of the secondary
+ * boot "holding pen"
+ */
+volatile int pen_release = -1;
+
+enum ipi_msg_type {
+	IPI_WAKEUP,
+	IPI_TIMER,
+	IPI_RESCHEDULE,
+	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
+	IPI_CPU_STOP,
+	IPI_IRQ_WORK,
+	IPI_COMPLETION,
+};
+
+static DECLARE_COMPLETION(cpu_running);
+
+static struct smp_operations smp_ops;
+
+void __init smp_set_ops(struct smp_operations *ops)
+{
+	if (ops)
+		smp_ops = *ops;
+};
+
+static unsigned long get_arch_pgd(pgd_t *pgd)
+{
+	phys_addr_t pgdir = virt_to_idmap(pgd);
+	BUG_ON(pgdir & ARCH_PGD_MASK);
+	return pgdir >> ARCH_PGD_SHIFT;
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	int ret;
+
+	if (!smp_ops.smp_boot_secondary)
+		return -ENOSYS;
+
+	/*
+	 * We need to tell the secondary core where to find
+	 * its stack and the page tables.
+	 */
+	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+#ifdef CONFIG_ARM_MPU
+	secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr;
+#endif
+
+#ifdef CONFIG_MMU
+	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
+#endif
+	sync_cache_w(&secondary_data);
+
+	/*
+	 * Now bring the CPU into our world.
+	 */
+	ret = smp_ops.smp_boot_secondary(cpu, idle);
+	if (ret == 0) {
+		/*
+		 * CPU was successfully started, wait for it
+		 * to come online or time out.
+		 */
+		wait_for_completion_timeout(&cpu_running,
+						 msecs_to_jiffies(1000));
+
+		if (!cpu_online(cpu)) {
+			pr_crit("CPU%u: failed to come online\n", cpu);
+			ret = -EIO;
+		}
+	} else {
+		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
+	}
+
+
+	memset(&secondary_data, 0, sizeof(secondary_data));
+	return ret;
+}
+
+/* platform specific SMP operations */
+void __init smp_init_cpus(void)
+{
+	if (smp_ops.smp_init_cpus)
+		smp_ops.smp_init_cpus();
+}
+
+int platform_can_secondary_boot(void)
+{
+	return !!smp_ops.smp_boot_secondary;
+}
+
+int platform_can_cpu_hotplug(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	if (smp_ops.cpu_kill)
+		return 1;
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int platform_cpu_kill(unsigned int cpu)
+{
+	if (smp_ops.cpu_kill)
+		return smp_ops.cpu_kill(cpu);
+	return 1;
+}
+
+static int platform_cpu_disable(unsigned int cpu)
+{
+	if (smp_ops.cpu_disable)
+		return smp_ops.cpu_disable(cpu);
+
+	/*
+	 * By default, allow disabling all CPUs except the first one,
+	 * since this is special on a lot of platforms, e.g. because
+	 * of clock tick interrupts.
+	 */
+	return cpu == 0 ? -EPERM : 0;
+}
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+	int ret;
+
+	ret = platform_cpu_disable(cpu);
+	if (ret)
+		return ret;
+
+	/*
+	 * Take this CPU offline.  Once we clear this, we can't return,
+	 * and we must not schedule until we're ready to give up the cpu.
+	 */
+	set_cpu_online(cpu, false);
+
+	/*
+	 * OK - migrate IRQs away from this CPU
+	 */
+	migrate_irqs();
+
+	/*
+	 * Flush user cache and TLB mappings, and then remove this CPU
+	 * from the vm mask set of all processes.
+	 *
+	 * Caches are flushed to the Level of Unification Inner Shareable
+	 * to write-back dirty lines to unified caches shared by all CPUs.
+	 */
+	flush_cache_louis();
+	local_flush_tlb_all();
+
+	clear_tasks_mm_cpumask(cpu);
+
+	return 0;
+}
+
+static DECLARE_COMPLETION(cpu_died);
+
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpu_die(unsigned int cpu)
+{
+	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+		pr_err("CPU%u: cpu didn't die\n", cpu);
+		return;
+	}
+	pr_notice("CPU%u: shutdown\n", cpu);
+
+	/*
+	 * platform_cpu_kill() is generally expected to do the powering off
+	 * and/or cutting of clocks to the dying CPU.  Optionally, this may
+	 * be done by the CPU which is dying in preference to supporting
+	 * this call, but that means there is _no_ synchronisation between
+	 * the requesting CPU and the dying CPU actually losing power.
+	 */
+	if (!platform_cpu_kill(cpu))
+		pr_err("CPU%u: unable to kill\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we disable IRQs here, but do not re-enable them
+ * before returning to the caller. This is also the behaviour
+ * of the other hotplug-cpu capable cores, so presumably coming
+ * out of idle fixes this.
+ */
+void __ref cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	idle_task_exit();
+
+	local_irq_disable();
+
+	/*
+	 * Flush the data out of the L1 cache for this CPU.  This must be
+	 * before the completion to ensure that data is safely written out
+	 * before platform_cpu_kill() gets called - which may disable
+	 * *this* CPU and power down its cache.
+	 */
+	flush_cache_louis();
+
+	/*
+	 * Tell __cpu_die() that this CPU is now safe to dispose of.  Once
+	 * this returns, power and/or clocks can be removed at any point
+	 * from this CPU and its cache by platform_cpu_kill().
+	 */
+	complete(&cpu_died);
+
+	/*
+	 * Ensure that the cache lines associated with that completion are
+	 * written out.  This covers the case where _this_ CPU is doing the
+	 * powering down, to ensure that the completion is visible to the
+	 * CPU waiting for this one.
+	 */
+	flush_cache_louis();
+
+	/*
+	 * The actual CPU shutdown procedure is at least platform (if not
+	 * CPU) specific.  This may remove power, or it may simply spin.
+	 *
+	 * Platforms are generally expected *NOT* to return from this call,
+	 * although there are some which do because they have no way to
+	 * power down the CPU.  These platforms are the _only_ reason we
+	 * have a return path which uses the fragment of assembly below.
+	 *
+	 * The return path should not be used for platforms which can
+	 * power off the CPU.
+	 */
+	if (smp_ops.cpu_die)
+		smp_ops.cpu_die(cpu);
+
+	pr_warn("CPU%u: smp_ops.cpu_die() returned, trying to resuscitate\n",
+		cpu);
+
+	/*
+	 * Do not return to the idle loop - jump back to the secondary
+	 * cpu initialisation.  There's some initialisation which needs
+	 * to be repeated to undo the effects of taking the CPU offline.
+	 */
+	__asm__("mov	sp, %0\n"
+	"	mov	fp, #0\n"
+	"	b	secondary_start_kernel"
+		:
+		: "r" (task_stack_page(current) + THREAD_SIZE - 8));
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+static void smp_store_cpu_info(unsigned int cpuid)
+{
+	struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
+
+	cpu_info->loops_per_jiffy = loops_per_jiffy;
+	cpu_info->cpuid = read_cpuid_id();
+
+	store_cpu_topology(cpuid);
+}
+
+/*
+ * This is the secondary CPU boot entry.  We're using this CPUs
+ * idle thread stack, but a set of temporary page tables.
+ */
+asmlinkage void secondary_start_kernel(void)
+{
+	struct mm_struct *mm = &init_mm;
+	unsigned int cpu;
+
+	/*
+	 * The identity mapping is uncached (strongly ordered), so
+	 * switch away from it before attempting any exclusive accesses.
+	 */
+	cpu_switch_mm(mm->pgd, mm);
+	local_flush_bp_all();
+	enter_lazy_tlb(mm, current);
+	local_flush_tlb_all();
+
+	/*
+	 * All kernel threads share the same mm context; grab a
+	 * reference and switch to it.
+	 */
+	cpu = smp_processor_id();
+	atomic_inc(&mm->mm_count);
+	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+
+	cpu_init();
+
+	pr_debug("CPU%u: Booted secondary processor\n", cpu);
+
+	preempt_disable();
+	trace_hardirqs_off();
+
+	/*
+	 * Give the platform a chance to do its own initialisation.
+	 */
+	if (smp_ops.smp_secondary_init)
+		smp_ops.smp_secondary_init(cpu);
+
+	notify_cpu_starting(cpu);
+
+	calibrate_delay();
+
+	smp_store_cpu_info(cpu);
+
+	/*
+	 * OK, now it's safe to let the boot CPU continue.  Wait for
+	 * the CPU migration code to notice that the CPU is online
+	 * before we continue - which happens after __cpu_up returns.
+	 */
+	set_cpu_online(cpu, true);
+	complete(&cpu_running);
+
+	local_irq_enable();
+	local_fiq_enable();
+
+	/*
+	 * OK, it's off to the idle thread for us
+	 */
+	cpu_startup_entry(CPUHP_ONLINE);
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+
+	for_each_online_cpu(cpu)
+		bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
+
+	printk(KERN_INFO "SMP: Total of %d processors activated "
+	       "(%lu.%02lu BogoMIPS).\n",
+	       num_online_cpus(),
+	       bogosum / (500000/HZ),
+	       (bogosum / (5000/HZ)) % 100);
+
+	hyp_mode_check();
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	unsigned int ncores = num_possible_cpus();
+
+	init_cpu_topology();
+
+	smp_store_cpu_info(smp_processor_id());
+
+	/*
+	 * are we trying to boot more cores than exist?
+	 */
+	if (max_cpus > ncores)
+		max_cpus = ncores;
+	if (ncores > 1 && max_cpus) {
+		/*
+		 * Initialise the present map, which describes the set of CPUs
+		 * actually populated at the present time. A platform should
+		 * re-initialize the map in the platforms smp_prepare_cpus()
+		 * if present != possible (e.g. physical hotplug).
+		 */
+		init_cpu_present(cpu_possible_mask);
+
+		/*
+		 * Initialise the SCU if there are more than one CPU
+		 * and let them know where to start.
+		 */
+		if (smp_ops.smp_prepare_cpus)
+			smp_ops.smp_prepare_cpus(max_cpus);
+	}
+}
+
+static void (*__smp_cross_call)(const struct cpumask *, unsigned int);
+
+void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
+{
+	if (!__smp_cross_call)
+		__smp_cross_call = fn;
+}
+
+static const char *ipi_types[NR_IPI] __tracepoint_string = {
+#define S(x,s)	[x] = s
+	S(IPI_WAKEUP, "CPU wakeup interrupts"),
+	S(IPI_TIMER, "Timer broadcast interrupts"),
+	S(IPI_RESCHEDULE, "Rescheduling interrupts"),
+	S(IPI_CALL_FUNC, "Function call interrupts"),
+	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
+	S(IPI_CPU_STOP, "CPU stop interrupts"),
+	S(IPI_IRQ_WORK, "IRQ work interrupts"),
+	S(IPI_COMPLETION, "completion interrupts"),
+};
+
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
+{
+	trace_ipi_raise(target, ipi_types[ipinr]);
+	__smp_cross_call(target, ipinr);
+}
+
+void show_ipi_list(struct seq_file *p, int prec)
+{
+	unsigned int cpu, i;
+
+	for (i = 0; i < NR_IPI; i++) {
+		seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
+
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ",
+				   __get_irq_stat(cpu, ipi_irqs[i]));
+
+		seq_printf(p, " %s\n", ipi_types[i]);
+	}
+}
+
+u64 smp_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = 0;
+	int i;
+
+	for (i = 0; i < NR_IPI; i++)
+		sum += __get_irq_stat(cpu, ipi_irqs[i]);
+
+	return sum;
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_CALL_FUNC);
+}
+
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_WAKEUP);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+}
+
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+	if (arch_irq_work_has_interrupt())
+		smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+}
+#endif
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_TIMER);
+}
+#endif
+
+static DEFINE_RAW_SPINLOCK(stop_lock);
+
+/*
+ * ipi_cpu_stop - handle IPI from smp_send_stop()
+ */
+static void ipi_cpu_stop(unsigned int cpu)
+{
+	if (system_state == SYSTEM_BOOTING ||
+	    system_state == SYSTEM_RUNNING) {
+		raw_spin_lock(&stop_lock);
+		pr_crit("CPU%u: stopping\n", cpu);
+		dump_stack();
+		raw_spin_unlock(&stop_lock);
+	}
+
+	set_cpu_online(cpu, false);
+
+	local_fiq_disable();
+	local_irq_disable();
+
+	while (1)
+		cpu_relax();
+}
+
+static DEFINE_PER_CPU(struct completion *, cpu_completion);
+
+int register_ipi_completion(struct completion *completion, int cpu)
+{
+	per_cpu(cpu_completion, cpu) = completion;
+	return IPI_COMPLETION;
+}
+
+static void ipi_complete(unsigned int cpu)
+{
+	complete(per_cpu(cpu_completion, cpu));
+}
+
+/*
+ * Main handler for inter-processor interrupts
+ */
+asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
+{
+	handle_IPI(ipinr, regs);
+}
+
+void handle_IPI(int ipinr, struct pt_regs *regs)
+{
+	unsigned int cpu = smp_processor_id();
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	if ((unsigned)ipinr < NR_IPI) {
+		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
+		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
+	}
+
+	switch (ipinr) {
+	case IPI_WAKEUP:
+		break;
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	case IPI_TIMER:
+		irq_enter();
+		tick_receive_broadcast();
+		irq_exit();
+		break;
+#endif
+
+	case IPI_RESCHEDULE:
+		scheduler_ipi();
+		break;
+
+	case IPI_CALL_FUNC:
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
+		break;
+
+	case IPI_CALL_FUNC_SINGLE:
+		irq_enter();
+		generic_smp_call_function_single_interrupt();
+		irq_exit();
+		break;
+
+	case IPI_CPU_STOP:
+		irq_enter();
+		ipi_cpu_stop(cpu);
+		irq_exit();
+		break;
+
+#ifdef CONFIG_IRQ_WORK
+	case IPI_IRQ_WORK:
+		irq_enter();
+		irq_work_run();
+		irq_exit();
+		break;
+#endif
+
+	case IPI_COMPLETION:
+		irq_enter();
+		ipi_complete(cpu);
+		irq_exit();
+		break;
+
+	default:
+		pr_crit("CPU%u: Unknown IPI message 0x%x\n",
+		        cpu, ipinr);
+		break;
+	}
+
+	if ((unsigned)ipinr < NR_IPI)
+		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
+	set_irq_regs(old_regs);
+}
+
+void smp_send_reschedule(int cpu)
+{
+	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+void smp_send_stop(void)
+{
+	unsigned long timeout;
+	struct cpumask mask;
+
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+	if (!cpumask_empty(&mask))
+		smp_cross_call(&mask, IPI_CPU_STOP);
+
+	/* Wait up to one second for other CPUs to stop */
+	timeout = USEC_PER_SEC;
+	while (num_online_cpus() > 1 && timeout--)
+		udelay(1);
+
+	if (num_online_cpus() > 1)
+		pr_warn("SMP: failed to stop secondary CPUs\n");
+}
+
+/*
+ * not supported here
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+static DEFINE_PER_CPU(unsigned long, l_p_j_ref);
+static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq);
+static unsigned long global_l_p_j_ref;
+static unsigned long global_l_p_j_ref_freq;
+
+static int cpufreq_callback(struct notifier_block *nb,
+					unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	int cpu = freq->cpu;
+
+	if (freq->flags & CPUFREQ_CONST_LOOPS)
+		return NOTIFY_OK;
+
+	if (!per_cpu(l_p_j_ref, cpu)) {
+		per_cpu(l_p_j_ref, cpu) =
+			per_cpu(cpu_data, cpu).loops_per_jiffy;
+		per_cpu(l_p_j_ref_freq, cpu) = freq->old;
+		if (!global_l_p_j_ref) {
+			global_l_p_j_ref = loops_per_jiffy;
+			global_l_p_j_ref_freq = freq->old;
+		}
+	}
+
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+		loops_per_jiffy = cpufreq_scale(global_l_p_j_ref,
+						global_l_p_j_ref_freq,
+						freq->new);
+		per_cpu(cpu_data, cpu).loops_per_jiffy =
+			cpufreq_scale(per_cpu(l_p_j_ref, cpu),
+					per_cpu(l_p_j_ref_freq, cpu),
+					freq->new);
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_notifier = {
+	.notifier_call  = cpufreq_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+	return cpufreq_register_notifier(&cpufreq_notifier,
+						CPUFREQ_TRANSITION_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+#endif
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
new file mode 100644
index 000000000..72f9241ad
--- /dev/null
+++ b/arch/arm/kernel/smp_scu.c
@@ -0,0 +1,96 @@
+/*
+ *  linux/arch/arm/kernel/smp_scu.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include <asm/smp_plat.h>
+#include <asm/smp_scu.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+
+#define SCU_CTRL		0x00
+#define SCU_ENABLE		(1 << 0)
+#define SCU_STANDBY_ENABLE	(1 << 5)
+#define SCU_CONFIG		0x04
+#define SCU_CPU_STATUS		0x08
+#define SCU_INVALIDATE		0x0c
+#define SCU_FPGA_REVISION	0x10
+
+#ifdef CONFIG_SMP
+/*
+ * Get the number of CPU cores from the SCU configuration
+ */
+unsigned int __init scu_get_core_count(void __iomem *scu_base)
+{
+	unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG);
+	return (ncores & 0x03) + 1;
+}
+
+/*
+ * Enable the SCU
+ */
+void scu_enable(void __iomem *scu_base)
+{
+	u32 scu_ctrl;
+
+#ifdef CONFIG_ARM_ERRATA_764369
+	/* Cortex-A9 only */
+	if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) {
+		scu_ctrl = readl_relaxed(scu_base + 0x30);
+		if (!(scu_ctrl & 1))
+			writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30);
+	}
+#endif
+
+	scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
+	/* already enabled? */
+	if (scu_ctrl & SCU_ENABLE)
+		return;
+
+	scu_ctrl |= SCU_ENABLE;
+
+	/* Cortex-A9 earlier than r2p0 has no standby bit in SCU */
+	if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 &&
+	    (read_cpuid_id() & 0x00f0000f) >= 0x00200000)
+		scu_ctrl |= SCU_STANDBY_ENABLE;
+
+	writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
+
+	/*
+	 * Ensure that the data accessed by CPU0 before the SCU was
+	 * initialised is visible to the other CPUs.
+	 */
+	flush_cache_all();
+}
+#endif
+
+/*
+ * Set the executing CPUs power mode as defined.  This will be in
+ * preparation for it executing a WFI instruction.
+ *
+ * This function must be called with preemption disabled, and as it
+ * has the side effect of disabling coherency, caches must have been
+ * flushed.  Interrupts must also have been disabled.
+ */
+int scu_power_mode(void __iomem *scu_base, unsigned int mode)
+{
+	unsigned int val;
+	int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0);
+
+	if (mode > 3 || mode == 1 || cpu > 3)
+		return -EINVAL;
+
+	val = readb_relaxed(scu_base + SCU_CPU_STATUS + cpu) & ~0x03;
+	val |= mode;
+	writeb_relaxed(val, scu_base + SCU_CPU_STATUS + cpu);
+
+	return 0;
+}
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
new file mode 100644
index 000000000..2e72be4f6
--- /dev/null
+++ b/arch/arm/kernel/smp_tlb.c
@@ -0,0 +1,213 @@
+/*
+ *  linux/arch/arm/kernel/smp_tlb.c
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/preempt.h>
+#include <linux/smp.h>
+
+#include <asm/smp_plat.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+
+/**********************************************************************/
+
+/*
+ * TLB operations
+ */
+struct tlb_args {
+	struct vm_area_struct *ta_vma;
+	unsigned long ta_start;
+	unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_all(void *ignored)
+{
+	local_flush_tlb_all();
+}
+
+static inline void ipi_flush_tlb_mm(void *arg)
+{
+	struct mm_struct *mm = (struct mm_struct *)arg;
+
+	local_flush_tlb_mm(mm);
+}
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_kernel_page(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_page(ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+static inline void ipi_flush_bp_all(void *ignored)
+{
+	local_flush_bp_all();
+}
+
+#ifdef CONFIG_ARM_ERRATA_798181
+bool (*erratum_a15_798181_handler)(void);
+
+static bool erratum_a15_798181_partial(void)
+{
+	asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0));
+	dsb(ish);
+	return false;
+}
+
+static bool erratum_a15_798181_broadcast(void)
+{
+	asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0));
+	dsb(ish);
+	return true;
+}
+
+void erratum_a15_798181_init(void)
+{
+	unsigned int midr = read_cpuid_id();
+	unsigned int revidr = read_cpuid(CPUID_REVIDR);
+
+	/* Brahma-B15 r0p0..r0p2 affected
+	 * Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
+	if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2)
+		erratum_a15_798181_handler = erratum_a15_798181_broadcast;
+	else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 &&
+		 (revidr & 0x210) != 0x210) {
+		if (revidr & 0x10)
+			erratum_a15_798181_handler =
+				erratum_a15_798181_partial;
+		else
+			erratum_a15_798181_handler =
+				erratum_a15_798181_broadcast;
+	}
+}
+#endif
+
+static void ipi_flush_tlb_a15_erratum(void *arg)
+{
+	dmb();
+}
+
+static void broadcast_tlb_a15_erratum(void)
+{
+	if (!erratum_a15_798181())
+		return;
+
+	smp_call_function(ipi_flush_tlb_a15_erratum, NULL, 1);
+}
+
+static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
+{
+	int this_cpu;
+	cpumask_t mask = { CPU_BITS_NONE };
+
+	if (!erratum_a15_798181())
+		return;
+
+	this_cpu = get_cpu();
+	a15_erratum_get_cpumask(this_cpu, mm, &mask);
+	smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
+	put_cpu();
+}
+
+void flush_tlb_all(void)
+{
+	if (tlb_ops_need_broadcast())
+		on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+	else
+		__flush_tlb_all();
+	broadcast_tlb_a15_erratum();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (tlb_ops_need_broadcast())
+		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
+	else
+		__flush_tlb_mm(mm);
+	broadcast_tlb_mm_a15_erratum(mm);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = uaddr;
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
+					&ta, 1);
+	} else
+		__flush_tlb_page(vma, uaddr);
+	broadcast_tlb_mm_a15_erratum(vma->vm_mm);
+}
+
+void flush_tlb_kernel_page(unsigned long kaddr)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = kaddr;
+		on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
+	} else
+		__flush_tlb_kernel_page(kaddr);
+	broadcast_tlb_a15_erratum();
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+                     unsigned long start, unsigned long end)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+					&ta, 1);
+	} else
+		local_flush_tlb_range(vma, start, end);
+	broadcast_tlb_mm_a15_erratum(vma->vm_mm);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+	} else
+		local_flush_tlb_kernel_range(start, end);
+	broadcast_tlb_a15_erratum();
+}
+
+void flush_bp_all(void)
+{
+	if (tlb_ops_need_broadcast())
+		on_each_cpu(ipi_flush_bp_all, NULL, 1);
+	else
+		__flush_bp_all();
+}
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
new file mode 100644
index 000000000..172c6a05d
--- /dev/null
+++ b/arch/arm/kernel/smp_twd.c
@@ -0,0 +1,414 @@
+/*
+ *  linux/arch/arm/kernel/smp_twd.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/smp.h>
+#include <linux/jiffies.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+
+#include <asm/smp_plat.h>
+#include <asm/smp_twd.h>
+
+/* set up by the platform code */
+static void __iomem *twd_base;
+
+static struct clk *twd_clk;
+static unsigned long twd_timer_rate;
+static DEFINE_PER_CPU(bool, percpu_setup_called);
+
+static struct clock_event_device __percpu *twd_evt;
+static int twd_ppi;
+
+static void twd_set_mode(enum clock_event_mode mode,
+			struct clock_event_device *clk)
+{
+	unsigned long ctrl;
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE
+			| TWD_TIMER_CONTROL_PERIODIC;
+		writel_relaxed(DIV_ROUND_CLOSEST(twd_timer_rate, HZ),
+			twd_base + TWD_TIMER_LOAD);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* period set, and timer enabled in 'next_event' hook */
+		ctrl = TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_ONESHOT;
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		ctrl = 0;
+	}
+
+	writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL);
+}
+
+static int twd_set_next_event(unsigned long evt,
+			struct clock_event_device *unused)
+{
+	unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL);
+
+	ctrl |= TWD_TIMER_CONTROL_ENABLE;
+
+	writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER);
+	writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL);
+
+	return 0;
+}
+
+/*
+ * local_timer_ack: checks for a local timer interrupt.
+ *
+ * If a local timer interrupt has occurred, acknowledge and return 1.
+ * Otherwise, return 0.
+ */
+static int twd_timer_ack(void)
+{
+	if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) {
+		writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void twd_timer_stop(void)
+{
+	struct clock_event_device *clk = raw_cpu_ptr(twd_evt);
+
+	twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk);
+	disable_percpu_irq(clk->irq);
+}
+
+#ifdef CONFIG_COMMON_CLK
+
+/*
+ * Updates clockevent frequency when the cpu frequency changes.
+ * Called on the cpu that is changing frequency with interrupts disabled.
+ */
+static void twd_update_frequency(void *new_rate)
+{
+	twd_timer_rate = *((unsigned long *) new_rate);
+
+	clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate);
+}
+
+static int twd_rate_change(struct notifier_block *nb,
+	unsigned long flags, void *data)
+{
+	struct clk_notifier_data *cnd = data;
+
+	/*
+	 * The twd clock events must be reprogrammed to account for the new
+	 * frequency.  The timer is local to a cpu, so cross-call to the
+	 * changing cpu.
+	 */
+	if (flags == POST_RATE_CHANGE)
+		on_each_cpu(twd_update_frequency,
+				  (void *)&cnd->new_rate, 1);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block twd_clk_nb = {
+	.notifier_call = twd_rate_change,
+};
+
+static int twd_clk_init(void)
+{
+	if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk))
+		return clk_notifier_register(twd_clk, &twd_clk_nb);
+
+	return 0;
+}
+core_initcall(twd_clk_init);
+
+#elif defined (CONFIG_CPU_FREQ)
+
+#include <linux/cpufreq.h>
+
+/*
+ * Updates clockevent frequency when the cpu frequency changes.
+ * Called on the cpu that is changing frequency with interrupts disabled.
+ */
+static void twd_update_frequency(void *data)
+{
+	twd_timer_rate = clk_get_rate(twd_clk);
+
+	clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate);
+}
+
+static int twd_cpufreq_transition(struct notifier_block *nb,
+	unsigned long state, void *data)
+{
+	struct cpufreq_freqs *freqs = data;
+
+	/*
+	 * The twd clock events must be reprogrammed to account for the new
+	 * frequency.  The timer is local to a cpu, so cross-call to the
+	 * changing cpu.
+	 */
+	if (state == CPUFREQ_POSTCHANGE)
+		smp_call_function_single(freqs->cpu, twd_update_frequency,
+			NULL, 1);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block twd_cpufreq_nb = {
+	.notifier_call = twd_cpufreq_transition,
+};
+
+static int twd_cpufreq_init(void)
+{
+	if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk))
+		return cpufreq_register_notifier(&twd_cpufreq_nb,
+			CPUFREQ_TRANSITION_NOTIFIER);
+
+	return 0;
+}
+core_initcall(twd_cpufreq_init);
+
+#endif
+
+static void twd_calibrate_rate(void)
+{
+	unsigned long count;
+	u64 waitjiffies;
+
+	/*
+	 * If this is the first time round, we need to work out how fast
+	 * the timer ticks
+	 */
+	if (twd_timer_rate == 0) {
+		pr_info("Calibrating local timer... ");
+
+		/* Wait for a tick to start */
+		waitjiffies = get_jiffies_64() + 1;
+
+		while (get_jiffies_64() < waitjiffies)
+			udelay(10);
+
+		/* OK, now the tick has started, let's get the timer going */
+		waitjiffies += 5;
+
+				 /* enable, no interrupt or reload */
+		writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL);
+
+				 /* maximum value */
+		writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
+
+		while (get_jiffies_64() < waitjiffies)
+			udelay(10);
+
+		count = readl_relaxed(twd_base + TWD_TIMER_COUNTER);
+
+		twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
+
+		pr_cont("%lu.%02luMHz.\n", twd_timer_rate / 1000000,
+			(twd_timer_rate / 10000) % 100);
+	}
+}
+
+static irqreturn_t twd_handler(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+
+	if (twd_timer_ack()) {
+		evt->event_handler(evt);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void twd_get_clock(struct device_node *np)
+{
+	int err;
+
+	if (np)
+		twd_clk = of_clk_get(np, 0);
+	else
+		twd_clk = clk_get_sys("smp_twd", NULL);
+
+	if (IS_ERR(twd_clk)) {
+		pr_err("smp_twd: clock not found %d\n", (int) PTR_ERR(twd_clk));
+		return;
+	}
+
+	err = clk_prepare_enable(twd_clk);
+	if (err) {
+		pr_err("smp_twd: clock failed to prepare+enable: %d\n", err);
+		clk_put(twd_clk);
+		return;
+	}
+
+	twd_timer_rate = clk_get_rate(twd_clk);
+}
+
+/*
+ * Setup the local clock events for a CPU.
+ */
+static void twd_timer_setup(void)
+{
+	struct clock_event_device *clk = raw_cpu_ptr(twd_evt);
+	int cpu = smp_processor_id();
+
+	/*
+	 * If the basic setup for this CPU has been done before don't
+	 * bother with the below.
+	 */
+	if (per_cpu(percpu_setup_called, cpu)) {
+		writel_relaxed(0, twd_base + TWD_TIMER_CONTROL);
+		clockevents_register_device(clk);
+		enable_percpu_irq(clk->irq, 0);
+		return;
+	}
+	per_cpu(percpu_setup_called, cpu) = true;
+
+	twd_calibrate_rate();
+
+	/*
+	 * The following is done once per CPU the first time .setup() is
+	 * called.
+	 */
+	writel_relaxed(0, twd_base + TWD_TIMER_CONTROL);
+
+	clk->name = "local_timer";
+	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+			CLOCK_EVT_FEAT_C3STOP;
+	clk->rating = 350;
+	clk->set_mode = twd_set_mode;
+	clk->set_next_event = twd_set_next_event;
+	clk->irq = twd_ppi;
+	clk->cpumask = cpumask_of(cpu);
+
+	clockevents_config_and_register(clk, twd_timer_rate,
+					0xf, 0xffffffff);
+	enable_percpu_irq(clk->irq, 0);
+}
+
+static int twd_timer_cpu_notify(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		twd_timer_setup();
+		break;
+	case CPU_DYING:
+		twd_timer_stop();
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block twd_timer_cpu_nb = {
+	.notifier_call = twd_timer_cpu_notify,
+};
+
+static int __init twd_local_timer_common_register(struct device_node *np)
+{
+	int err;
+
+	twd_evt = alloc_percpu(struct clock_event_device);
+	if (!twd_evt) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	err = request_percpu_irq(twd_ppi, twd_handler, "twd", twd_evt);
+	if (err) {
+		pr_err("twd: can't register interrupt %d (%d)\n", twd_ppi, err);
+		goto out_free;
+	}
+
+	err = register_cpu_notifier(&twd_timer_cpu_nb);
+	if (err)
+		goto out_irq;
+
+	twd_get_clock(np);
+
+	/*
+	 * Immediately configure the timer on the boot CPU, unless we need
+	 * jiffies to be incrementing to calibrate the rate in which case
+	 * setup the timer in late_time_init.
+	 */
+	if (twd_timer_rate)
+		twd_timer_setup();
+	else
+		late_time_init = twd_timer_setup;
+
+	return 0;
+
+out_irq:
+	free_percpu_irq(twd_ppi, twd_evt);
+out_free:
+	iounmap(twd_base);
+	twd_base = NULL;
+	free_percpu(twd_evt);
+
+	return err;
+}
+
+int __init twd_local_timer_register(struct twd_local_timer *tlt)
+{
+	if (twd_base || twd_evt)
+		return -EBUSY;
+
+	twd_ppi	= tlt->res[1].start;
+
+	twd_base = ioremap(tlt->res[0].start, resource_size(&tlt->res[0]));
+	if (!twd_base)
+		return -ENOMEM;
+
+	return twd_local_timer_common_register(NULL);
+}
+
+#ifdef CONFIG_OF
+static void __init twd_local_timer_of_register(struct device_node *np)
+{
+	int err;
+
+	if (!is_smp() || !setup_max_cpus)
+		return;
+
+	twd_ppi = irq_of_parse_and_map(np, 0);
+	if (!twd_ppi) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	twd_base = of_iomap(np, 0);
+	if (!twd_base) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = twd_local_timer_common_register(np);
+
+out:
+	WARN(err, "twd_local_timer_of_register failed (%d)\n", err);
+}
+CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
+#endif
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
new file mode 100644
index 000000000..92b72375c
--- /dev/null
+++ b/arch/arm/kernel/stacktrace.c
@@ -0,0 +1,179 @@
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/stacktrace.h>
+#include <asm/traps.h>
+
+#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND)
+/*
+ * Unwind the current stack frame and store the new register values in the
+ * structure passed as argument. Unwinding is equivalent to a function return,
+ * hence the new PC value rather than LR should be used for backtrace.
+ *
+ * With framepointer enabled, a simple function prologue looks like this:
+ *	mov	ip, sp
+ *	stmdb	sp!, {fp, ip, lr, pc}
+ *	sub	fp, ip, #4
+ *
+ * A simple function epilogue looks like this:
+ *	ldm	sp, {fp, sp, pc}
+ *
+ * Note that with framepointer enabled, even the leaf functions have the same
+ * prologue and epilogue, therefore we can ignore the LR value in this case.
+ */
+int notrace unwind_frame(struct stackframe *frame)
+{
+	unsigned long high, low;
+	unsigned long fp = frame->fp;
+
+	/* only go to a higher address on the stack */
+	low = frame->sp;
+	high = ALIGN(low, THREAD_SIZE);
+
+	/* check current frame pointer is within bounds */
+	if (fp < low + 12 || fp > high - 4)
+		return -EINVAL;
+
+	/* restore the registers from the stack frame */
+	frame->fp = *(unsigned long *)(fp - 12);
+	frame->sp = *(unsigned long *)(fp - 8);
+	frame->pc = *(unsigned long *)(fp - 4);
+
+	return 0;
+}
+#endif
+
+void notrace walk_stackframe(struct stackframe *frame,
+		     int (*fn)(struct stackframe *, void *), void *data)
+{
+	while (1) {
+		int ret;
+
+		if (fn(frame, data))
+			break;
+		ret = unwind_frame(frame);
+		if (ret < 0)
+			break;
+	}
+}
+EXPORT_SYMBOL(walk_stackframe);
+
+#ifdef CONFIG_STACKTRACE
+struct stack_trace_data {
+	struct stack_trace *trace;
+	unsigned long last_pc;
+	unsigned int no_sched_functions;
+	unsigned int skip;
+};
+
+static int save_trace(struct stackframe *frame, void *d)
+{
+	struct stack_trace_data *data = d;
+	struct stack_trace *trace = data->trace;
+	struct pt_regs *regs;
+	unsigned long addr = frame->pc;
+
+	if (data->no_sched_functions && in_sched_functions(addr))
+		return 0;
+	if (data->skip) {
+		data->skip--;
+		return 0;
+	}
+
+	trace->entries[trace->nr_entries++] = addr;
+
+	if (trace->nr_entries >= trace->max_entries)
+		return 1;
+
+	/*
+	 * in_exception_text() is designed to test if the PC is one of
+	 * the functions which has an exception stack above it, but
+	 * unfortunately what is in frame->pc is the return LR value,
+	 * not the saved PC value.  So, we need to track the previous
+	 * frame PC value when doing this.
+	 */
+	addr = data->last_pc;
+	data->last_pc = frame->pc;
+	if (!in_exception_text(addr))
+		return 0;
+
+	regs = (struct pt_regs *)frame->sp;
+
+	trace->entries[trace->nr_entries++] = regs->ARM_pc;
+
+	return trace->nr_entries >= trace->max_entries;
+}
+
+/* This must be noinline to so that our skip calculation works correctly */
+static noinline void __save_stack_trace(struct task_struct *tsk,
+	struct stack_trace *trace, unsigned int nosched)
+{
+	struct stack_trace_data data;
+	struct stackframe frame;
+
+	data.trace = trace;
+	data.last_pc = ULONG_MAX;
+	data.skip = trace->skip;
+	data.no_sched_functions = nosched;
+
+	if (tsk != current) {
+#ifdef CONFIG_SMP
+		/*
+		 * What guarantees do we have here that 'tsk' is not
+		 * running on another CPU?  For now, ignore it as we
+		 * can't guarantee we won't explode.
+		 */
+		if (trace->nr_entries < trace->max_entries)
+			trace->entries[trace->nr_entries++] = ULONG_MAX;
+		return;
+#else
+		frame.fp = thread_saved_fp(tsk);
+		frame.sp = thread_saved_sp(tsk);
+		frame.lr = 0;		/* recovered from the stack */
+		frame.pc = thread_saved_pc(tsk);
+#endif
+	} else {
+		/* We don't want this function nor the caller */
+		data.skip += 2;
+		frame.fp = (unsigned long)__builtin_frame_address(0);
+		frame.sp = current_stack_pointer;
+		frame.lr = (unsigned long)__builtin_return_address(0);
+		frame.pc = (unsigned long)__save_stack_trace;
+	}
+
+	walk_stackframe(&frame, save_trace, &data);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	struct stack_trace_data data;
+	struct stackframe frame;
+
+	data.trace = trace;
+	data.skip = trace->skip;
+	data.no_sched_functions = 0;
+
+	frame.fp = regs->ARM_fp;
+	frame.sp = regs->ARM_sp;
+	frame.lr = regs->ARM_lr;
+	frame.pc = regs->ARM_pc;
+
+	walk_stackframe(&frame, save_trace, &data);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	__save_stack_trace(tsk, trace, 1);
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	__save_stack_trace(current, trace, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
new file mode 100644
index 000000000..9a2f882a0
--- /dev/null
+++ b/arch/arm/kernel/suspend.c
@@ -0,0 +1,102 @@
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
+extern void cpu_resume_mmu(void);
+
+#ifdef CONFIG_MMU
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	u32 __mpidr = cpu_logical_map(smp_processor_id());
+	int ret;
+
+	if (!idmap_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn, __mpidr);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_bp_all();
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+#else
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	u32 __mpidr = cpu_logical_map(smp_processor_id());
+	return __cpu_suspend(arg, fn, __mpidr);
+}
+#define	idmap_pgd	NULL
+#endif
+
+/*
+ * This is called by __cpu_suspend() to save the state, and do whatever
+ * flushing is required to ensure that when the CPU goes to sleep we have
+ * the necessary data available when the caches are not searched.
+ */
+void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
+{
+	u32 *ctx = ptr;
+
+	*save_ptr = virt_to_phys(ptr);
+
+	/* This must correspond to the LDM in cpu_resume() assembly */
+	*ptr++ = virt_to_phys(idmap_pgd);
+	*ptr++ = sp;
+	*ptr++ = virt_to_phys(cpu_do_resume);
+
+	cpu_do_suspend(ptr);
+
+	flush_cache_louis();
+
+	/*
+	 * flush_cache_louis does not guarantee that
+	 * save_ptr and ptr are cleaned to main memory,
+	 * just up to the Level of Unification Inner Shareable.
+	 * Since the context pointer and context itself
+	 * are to be retrieved with the MMU off that
+	 * data must be cleaned from all cache levels
+	 * to main memory using "area" cache primitives.
+	*/
+	__cpuc_flush_dcache_area(ctx, ptrsz);
+	__cpuc_flush_dcache_area(save_ptr, sizeof(*save_ptr));
+
+	outer_clean_range(*save_ptr, *save_ptr + ptrsz);
+	outer_clean_range(virt_to_phys(save_ptr),
+			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
+}
+
+extern struct sleep_save_sp sleep_save_sp;
+
+static int cpu_suspend_alloc_sp(void)
+{
+	void *ctx_ptr;
+	/* ctx_ptr is an array of physical addresses */
+	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL);
+
+	if (WARN_ON(!ctx_ptr))
+		return -ENOMEM;
+	sleep_save_sp.save_ptr_stash = ctx_ptr;
+	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
+	sync_cache_w(&sleep_save_sp);
+	return 0;
+}
+early_initcall(cpu_suspend_alloc_sp);
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
new file mode 100644
index 000000000..136175678
--- /dev/null
+++ b/arch/arm/kernel/swp_emulate.c
@@ -0,0 +1,269 @@
+/*
+ *  linux/arch/arm/kernel/swp_emulate.c
+ *
+ *  Copyright (C) 2009 ARM Limited
+ *  __user_* functions adapted from include/asm/uaccess.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Implements emulation of the SWP/SWPB instructions using load-exclusive and
+ *  store-exclusive for processors that have them disabled (or future ones that
+ *  might not implement them).
+ *
+ *  Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
+ *  Where: Rt  = destination
+ *	   Rt2 = source
+ *	   Rn  = address
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/perf_event.h>
+
+#include <asm/opcodes.h>
+#include <asm/system_info.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+
+/*
+ * Error-checking SWP macros implemented using ldrex{b}/strex{b}
+ */
+#define __user_swpX_asm(data, addr, res, temp, B)		\
+	__asm__ __volatile__(					\
+	"	mov		%2, %1\n"			\
+	"0:	ldrex"B"	%1, [%3]\n"			\
+	"1:	strex"B"	%0, %2, [%3]\n"			\
+	"	cmp		%0, #0\n"			\
+	"	movne		%0, %4\n"			\
+	"2:\n"							\
+	"	.section	 .text.fixup,\"ax\"\n"		\
+	"	.align		2\n"				\
+	"3:	mov		%0, %5\n"			\
+	"	b		2b\n"				\
+	"	.previous\n"					\
+	"	.section	 __ex_table,\"a\"\n"		\
+	"	.align		3\n"				\
+	"	.long		0b, 3b\n"			\
+	"	.long		1b, 3b\n"			\
+	"	.previous"					\
+	: "=&r" (res), "+r" (data), "=&r" (temp)		\
+	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)		\
+	: "cc", "memory")
+
+#define __user_swp_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "")
+#define __user_swpb_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "b")
+
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+#define EXTRACT_REG_NUM(instruction, offset) \
+	(((instruction) & (0xf << (offset))) >> (offset))
+#define RN_OFFSET  16
+#define RT_OFFSET  12
+#define RT2_OFFSET  0
+/*
+ * Bit 22 of the instruction encoding distinguishes between
+ * the SWP and SWPB variants (bit set means SWPB).
+ */
+#define TYPE_SWPB (1 << 22)
+
+static unsigned long swpcounter;
+static unsigned long swpbcounter;
+static unsigned long abtcounter;
+static pid_t         previous_pid;
+
+#ifdef CONFIG_PROC_FS
+static int proc_status_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "Emulated SWP:\t\t%lu\n", swpcounter);
+	seq_printf(m, "Emulated SWPB:\t\t%lu\n", swpbcounter);
+	seq_printf(m, "Aborted SWP{B}:\t\t%lu\n", abtcounter);
+	if (previous_pid != 0)
+		seq_printf(m, "Last process:\t\t%d\n", previous_pid);
+	return 0;
+}
+
+static int proc_status_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_status_show, PDE_DATA(inode));
+}
+
+static const struct file_operations proc_status_fops = {
+	.open		= proc_status_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
+/*
+ * Set up process info to signal segmentation fault - called on access error.
+ */
+static void set_segfault(struct pt_regs *regs, unsigned long addr)
+{
+	siginfo_t info;
+
+	down_read(&current->mm->mmap_sem);
+	if (find_vma(current->mm, addr) == NULL)
+		info.si_code = SEGV_MAPERR;
+	else
+		info.si_code = SEGV_ACCERR;
+	up_read(&current->mm->mmap_sem);
+
+	info.si_signo = SIGSEGV;
+	info.si_errno = 0;
+	info.si_addr  = (void *) instruction_pointer(regs);
+
+	pr_debug("SWP{B} emulation: access caused memory abort!\n");
+	arm_notify_die("Illegal memory access", regs, &info, 0, 0);
+
+	abtcounter++;
+}
+
+static int emulate_swpX(unsigned int address, unsigned int *data,
+			unsigned int type)
+{
+	unsigned int res = 0;
+
+	if ((type != TYPE_SWPB) && (address & 0x3)) {
+		/* SWP to unaligned address not permitted */
+		pr_debug("SWP instruction on unaligned pointer!\n");
+		return -EFAULT;
+	}
+
+	while (1) {
+		unsigned long temp;
+
+		if (type == TYPE_SWPB)
+			__user_swpb_asm(*data, address, res, temp);
+		else
+			__user_swp_asm(*data, address, res, temp);
+
+		if (likely(res != -EAGAIN) || signal_pending(current))
+			break;
+
+		cond_resched();
+	}
+
+	if (res == 0) {
+		if (type == TYPE_SWPB)
+			swpbcounter++;
+		else
+			swpcounter++;
+	}
+
+	return res;
+}
+
+/*
+ * swp_handler logs the id of calling process, dissects the instruction, sanity
+ * checks the memory location, calls emulate_swpX for the actual operation and
+ * deals with fixup/error handling before returning
+ */
+static int swp_handler(struct pt_regs *regs, unsigned int instr)
+{
+	unsigned int address, destreg, data, type;
+	unsigned int res = 0;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
+
+	res = arm_check_condition(instr, regs->ARM_cpsr);
+	switch (res) {
+	case ARM_OPCODE_CONDTEST_PASS:
+		break;
+	case ARM_OPCODE_CONDTEST_FAIL:
+		/* Condition failed - return to next instruction */
+		regs->ARM_pc += 4;
+		return 0;
+	case ARM_OPCODE_CONDTEST_UNCOND:
+		/* If unconditional encoding - not a SWP, undef */
+		return -EFAULT;
+	default:
+		return -EINVAL;
+	}
+
+	if (current->pid != previous_pid) {
+		pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
+			 current->comm, (unsigned long)current->pid);
+		previous_pid = current->pid;
+	}
+
+	address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)];
+	data	= regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
+	destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);
+
+	type = instr & TYPE_SWPB;
+
+	pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
+		 EXTRACT_REG_NUM(instr, RN_OFFSET), address,
+		 destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data);
+
+	/* Check access in reasonable access range for both SWP and SWPB */
+	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+		pr_debug("SWP{B} emulation: access to %p not allowed!\n",
+			 (void *)address);
+		res = -EFAULT;
+	} else {
+		res = emulate_swpX(address, &data, type);
+	}
+
+	if (res == 0) {
+		/*
+		 * On successful emulation, revert the adjustment to the PC
+		 * made in kernel/traps.c in order to resume execution at the
+		 * instruction following the SWP{B}.
+		 */
+		regs->ARM_pc += 4;
+		regs->uregs[destreg] = data;
+	} else if (res == -EFAULT) {
+		/*
+		 * Memory errors do not mean emulation failed.
+		 * Set up signal info to return SEGV, then return OK
+		 */
+		set_segfault(regs, address);
+	}
+
+	return 0;
+}
+
+/*
+ * Only emulate SWP/SWPB executed in ARM state/User mode.
+ * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
+ */
+static struct undef_hook swp_hook = {
+	.instr_mask = 0x0fb00ff0,
+	.instr_val  = 0x01000090,
+	.cpsr_mask  = MODE_MASK | PSR_T_BIT | PSR_J_BIT,
+	.cpsr_val   = USR_MODE,
+	.fn	    = swp_handler
+};
+
+/*
+ * Register handler and create status file in /proc/cpu
+ * Invoked as late_initcall, since not needed before init spawned.
+ */
+static int __init swp_emulation_init(void)
+{
+	if (cpu_architecture() < CPU_ARCH_ARMv7)
+		return 0;
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops))
+		return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
+
+	pr_notice("Registering SWP/SWPB emulation handler\n");
+	register_undef_hook(&swp_hook);
+
+	return 0;
+}
+
+late_initcall(swp_emulation_init);
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
new file mode 100644
index 000000000..3151f5623
--- /dev/null
+++ b/arch/arm/kernel/sys_arm.c
@@ -0,0 +1,39 @@
+/*
+ *  linux/arch/arm/kernel/sys_arm.c
+ *
+ *  Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c
+ *  Copyright (C) 1995, 1996 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains various random system calls that
+ *  have a non-standard calling sequence on the Linux/arm
+ *  platform.
+ */
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/ipc.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+/*
+ * Since loff_t is a 64 bit type we avoid a lot of ABI hassle
+ * with a different argument ordering.
+ */
+asmlinkage long sys_arm_fadvise64_64(int fd, int advice,
+				     loff_t offset, loff_t len)
+{
+	return sys_fadvise64_64(fd, offset, len, advice);
+}
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
new file mode 100644
index 000000000..b83f3b773
--- /dev/null
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -0,0 +1,456 @@
+/*
+ *  arch/arm/kernel/sys_oabi-compat.c
+ *
+ *  Compatibility wrappers for syscalls that are used from
+ *  old ABI user space binaries with an EABI kernel.
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Oct 7, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+/*
+ * The legacy ABI and the new ARM EABI have different rules making some
+ * syscalls incompatible especially with structure arguments.
+ * Most notably, Eabi says 64-bit members should be 64-bit aligned instead of
+ * simply word aligned.  EABI also pads structures to the size of the largest
+ * member it contains instead of the invariant 32-bit.
+ *
+ * The following syscalls are affected:
+ *
+ * sys_stat64:
+ * sys_lstat64:
+ * sys_fstat64:
+ * sys_fstatat64:
+ *
+ *   struct stat64 has different sizes and some members are shifted
+ *   Compatibility wrappers are needed for them and provided below.
+ *
+ * sys_fcntl64:
+ *
+ *   struct flock64 has different sizes and some members are shifted
+ *   A compatibility wrapper is needed and provided below.
+ *
+ * sys_statfs64:
+ * sys_fstatfs64:
+ *
+ *   struct statfs64 has extra padding with EABI growing its size from
+ *   84 to 88.  This struct is now __attribute__((packed,aligned(4)))
+ *   with a small assembly wrapper to force the sz argument to 84 if it is 88
+ *   to avoid copying the extra padding over user space unexpecting it.
+ *
+ * sys_newuname:
+ *
+ *   struct new_utsname has no padding with EABI.  No problem there.
+ *
+ * sys_epoll_ctl:
+ * sys_epoll_wait:
+ *
+ *   struct epoll_event has its second member shifted also affecting the
+ *   structure size. Compatibility wrappers are needed and provided below.
+ *
+ * sys_ipc:
+ * sys_semop:
+ * sys_semtimedop:
+ *
+ *   struct sembuf loses its padding with EABI.  Since arrays of them are
+ *   used they have to be copyed to remove the padding. Compatibility wrappers
+ *   provided below.
+ *
+ * sys_bind:
+ * sys_connect:
+ * sys_sendmsg:
+ * sys_sendto:
+ * sys_socketcall:
+ *
+ *   struct sockaddr_un loses its padding with EABI.  Since the size of the
+ *   structure is used as a validation test in unix_mkname(), we need to
+ *   change the length argument to 110 whenever it is 112.  Compatibility
+ *   wrappers provided below.
+ */
+
+#include <linux/syscalls.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/eventpoll.h>
+#include <linux/sem.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/ipc.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+struct oldabi_stat64 {
+	unsigned long long st_dev;
+	unsigned int	__pad1;
+	unsigned long	__st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned long	st_uid;
+	unsigned long	st_gid;
+
+	unsigned long long st_rdev;
+	unsigned int	__pad2;
+
+	long long	st_size;
+	unsigned long	st_blksize;
+	unsigned long long st_blocks;
+
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+
+	unsigned long long st_ino;
+} __attribute__ ((packed,aligned(4)));
+
+static long cp_oldabi_stat64(struct kstat *stat,
+			     struct oldabi_stat64 __user *statbuf)
+{
+	struct oldabi_stat64 tmp;
+
+	tmp.st_dev = huge_encode_dev(stat->dev);
+	tmp.__pad1 = 0;
+	tmp.__st_ino = stat->ino;
+	tmp.st_mode = stat->mode;
+	tmp.st_nlink = stat->nlink;
+	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.st_rdev = huge_encode_dev(stat->rdev);
+	tmp.st_size = stat->size;
+	tmp.st_blocks = stat->blocks;
+	tmp.__pad2 = 0;
+	tmp.st_blksize = stat->blksize;
+	tmp.st_atime = stat->atime.tv_sec;
+	tmp.st_atime_nsec = stat->atime.tv_nsec;
+	tmp.st_mtime = stat->mtime.tv_sec;
+	tmp.st_mtime_nsec = stat->mtime.tv_nsec;
+	tmp.st_ctime = stat->ctime.tv_sec;
+	tmp.st_ctime_nsec = stat->ctime.tv_nsec;
+	tmp.st_ino = stat->ino;
+	return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
+}
+
+asmlinkage long sys_oabi_stat64(const char __user * filename,
+				struct oldabi_stat64 __user * statbuf)
+{
+	struct kstat stat;
+	int error = vfs_stat(filename, &stat);
+	if (!error)
+		error = cp_oldabi_stat64(&stat, statbuf);
+	return error;
+}
+
+asmlinkage long sys_oabi_lstat64(const char __user * filename,
+				 struct oldabi_stat64 __user * statbuf)
+{
+	struct kstat stat;
+	int error = vfs_lstat(filename, &stat);
+	if (!error)
+		error = cp_oldabi_stat64(&stat, statbuf);
+	return error;
+}
+
+asmlinkage long sys_oabi_fstat64(unsigned long fd,
+				 struct oldabi_stat64 __user * statbuf)
+{
+	struct kstat stat;
+	int error = vfs_fstat(fd, &stat);
+	if (!error)
+		error = cp_oldabi_stat64(&stat, statbuf);
+	return error;
+}
+
+asmlinkage long sys_oabi_fstatat64(int dfd,
+				   const char __user *filename,
+				   struct oldabi_stat64  __user *statbuf,
+				   int flag)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_fstatat(dfd, filename, &stat, flag);
+	if (error)
+		return error;
+	return cp_oldabi_stat64(&stat, statbuf);
+}
+
+struct oabi_flock64 {
+	short	l_type;
+	short	l_whence;
+	loff_t	l_start;
+	loff_t	l_len;
+	pid_t	l_pid;
+} __attribute__ ((packed,aligned(4)));
+
+asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct oabi_flock64 user;
+	struct flock64 kernel;
+	mm_segment_t fs = USER_DS; /* initialized to kill a warning */
+	unsigned long local_arg = arg;
+	int ret;
+
+	switch (cmd) {
+	case F_OFD_GETLK:
+	case F_OFD_SETLK:
+	case F_OFD_SETLKW:
+	case F_GETLK64:
+	case F_SETLK64:
+	case F_SETLKW64:
+		if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
+				   sizeof(user)))
+			return -EFAULT;
+		kernel.l_type	= user.l_type;
+		kernel.l_whence	= user.l_whence;
+		kernel.l_start	= user.l_start;
+		kernel.l_len	= user.l_len;
+		kernel.l_pid	= user.l_pid;
+		local_arg = (unsigned long)&kernel;
+		fs = get_fs();
+		set_fs(KERNEL_DS);
+	}
+
+	ret = sys_fcntl64(fd, cmd, local_arg);
+
+	switch (cmd) {
+	case F_GETLK64:
+		if (!ret) {
+			user.l_type	= kernel.l_type;
+			user.l_whence	= kernel.l_whence;
+			user.l_start	= kernel.l_start;
+			user.l_len	= kernel.l_len;
+			user.l_pid	= kernel.l_pid;
+			if (copy_to_user((struct oabi_flock64 __user *)arg,
+					 &user, sizeof(user)))
+				ret = -EFAULT;
+		}
+	case F_SETLK64:
+	case F_SETLKW64:
+		set_fs(fs);
+	}
+
+	return ret;
+}
+
+struct oabi_epoll_event {
+	__u32 events;
+	__u64 data;
+} __attribute__ ((packed,aligned(4)));
+
+asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
+				   struct oabi_epoll_event __user *event)
+{
+	struct oabi_epoll_event user;
+	struct epoll_event kernel;
+	mm_segment_t fs;
+	long ret;
+
+	if (op == EPOLL_CTL_DEL)
+		return sys_epoll_ctl(epfd, op, fd, NULL);
+	if (copy_from_user(&user, event, sizeof(user)))
+		return -EFAULT;
+	kernel.events = user.events;
+	kernel.data   = user.data;
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = sys_epoll_ctl(epfd, op, fd, &kernel);
+	set_fs(fs);
+	return ret;
+}
+
+asmlinkage long sys_oabi_epoll_wait(int epfd,
+				    struct oabi_epoll_event __user *events,
+				    int maxevents, int timeout)
+{
+	struct epoll_event *kbuf;
+	mm_segment_t fs;
+	long ret, err, i;
+
+	if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+		return -EINVAL;
+	kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
+	set_fs(fs);
+	err = 0;
+	for (i = 0; i < ret; i++) {
+		__put_user_error(kbuf[i].events, &events->events, err);
+		__put_user_error(kbuf[i].data,   &events->data,   err);
+		events++;
+	}
+	kfree(kbuf);
+	return err ? -EFAULT : ret;
+}
+
+struct oabi_sembuf {
+	unsigned short	sem_num;
+	short		sem_op;
+	short		sem_flg;
+	unsigned short	__pad;
+};
+
+asmlinkage long sys_oabi_semtimedop(int semid,
+				    struct oabi_sembuf __user *tsops,
+				    unsigned nsops,
+				    const struct timespec __user *timeout)
+{
+	struct sembuf *sops;
+	struct timespec local_timeout;
+	long err;
+	int i;
+
+	if (nsops < 1 || nsops > SEMOPM)
+		return -EINVAL;
+	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
+	if (!sops)
+		return -ENOMEM;
+	err = 0;
+	for (i = 0; i < nsops; i++) {
+		__get_user_error(sops[i].sem_num, &tsops->sem_num, err);
+		__get_user_error(sops[i].sem_op,  &tsops->sem_op,  err);
+		__get_user_error(sops[i].sem_flg, &tsops->sem_flg, err);
+		tsops++;
+	}
+	if (timeout) {
+		/* copy this as well before changing domain protection */
+		err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout));
+		timeout = &local_timeout;
+	}
+	if (err) {
+		err = -EFAULT;
+	} else {
+		mm_segment_t fs = get_fs();
+		set_fs(KERNEL_DS);
+		err = sys_semtimedop(semid, sops, nsops, timeout);
+		set_fs(fs);
+	}
+	kfree(sops);
+	return err;
+}
+
+asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
+			       unsigned nsops)
+{
+	return sys_oabi_semtimedop(semid, tsops, nsops, NULL);
+}
+
+asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
+			    void __user *ptr, long fifth)
+{
+	switch (call & 0xffff) {
+	case SEMOP:
+		return  sys_oabi_semtimedop(first,
+					    (struct oabi_sembuf __user *)ptr,
+					    second, NULL);
+	case SEMTIMEDOP:
+		return  sys_oabi_semtimedop(first,
+					    (struct oabi_sembuf __user *)ptr,
+					    second,
+					    (const struct timespec __user *)fifth);
+	default:
+		return sys_ipc(call, first, second, third, ptr, fifth);
+	}
+}
+
+asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen)
+{
+	sa_family_t sa_family;
+	if (addrlen == 112 &&
+	    get_user(sa_family, &addr->sa_family) == 0 &&
+	    sa_family == AF_UNIX)
+			addrlen = 110;
+	return sys_bind(fd, addr, addrlen);
+}
+
+asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen)
+{
+	sa_family_t sa_family;
+	if (addrlen == 112 &&
+	    get_user(sa_family, &addr->sa_family) == 0 &&
+	    sa_family == AF_UNIX)
+			addrlen = 110;
+	return sys_connect(fd, addr, addrlen);
+}
+
+asmlinkage long sys_oabi_sendto(int fd, void __user *buff,
+				size_t len, unsigned flags,
+				struct sockaddr __user *addr,
+				int addrlen)
+{
+	sa_family_t sa_family;
+	if (addrlen == 112 &&
+	    get_user(sa_family, &addr->sa_family) == 0 &&
+	    sa_family == AF_UNIX)
+			addrlen = 110;
+	return sys_sendto(fd, buff, len, flags, addr, addrlen);
+}
+
+asmlinkage long sys_oabi_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
+{
+	struct sockaddr __user *addr;
+	int msg_namelen;
+	sa_family_t sa_family;
+	if (msg &&
+	    get_user(msg_namelen, &msg->msg_namelen) == 0 &&
+	    msg_namelen == 112 &&
+	    get_user(addr, &msg->msg_name) == 0 &&
+	    get_user(sa_family, &addr->sa_family) == 0 &&
+	    sa_family == AF_UNIX)
+	{
+		/*
+		 * HACK ALERT: there is a limit to how much backward bending
+		 * we should do for what is actually a transitional
+		 * compatibility layer.  This already has known flaws with
+		 * a few ioctls that we don't intend to fix.  Therefore
+		 * consider this blatent hack as another one... and take care
+		 * to run for cover.  In most cases it will "just work fine".
+		 * If it doesn't, well, tough.
+		 */
+		put_user(110, &msg->msg_namelen);
+	}
+	return sys_sendmsg(fd, msg, flags);
+}
+
+asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args)
+{
+	unsigned long r = -EFAULT, a[6];
+
+	switch (call) {
+	case SYS_BIND:
+		if (copy_from_user(a, args, 3 * sizeof(long)) == 0)
+			r = sys_oabi_bind(a[0], (struct sockaddr __user *)a[1], a[2]);
+		break;
+	case SYS_CONNECT:
+		if (copy_from_user(a, args, 3 * sizeof(long)) == 0)
+			r = sys_oabi_connect(a[0], (struct sockaddr __user *)a[1], a[2]);
+		break;
+	case SYS_SENDTO:
+		if (copy_from_user(a, args, 6 * sizeof(long)) == 0)
+			r = sys_oabi_sendto(a[0], (void __user *)a[1], a[2], a[3],
+					    (struct sockaddr __user *)a[4], a[5]);
+		break;
+	case SYS_SENDMSG:
+		if (copy_from_user(a, args, 3 * sizeof(long)) == 0)
+			r = sys_oabi_sendmsg(a[0], (struct user_msghdr __user *)a[1], a[2]);
+		break;
+	default:
+		r = sys_socketcall(call, args);
+	}
+
+	return r;
+}
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
new file mode 100644
index 000000000..7a3be1d4d
--- /dev/null
+++ b/arch/arm/kernel/tcm.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * TCM memory handling for ARM systems
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Rickard Andersson <rickard.andersson@stericsson.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/genalloc.h>
+#include <linux/string.h> /* memcpy */
+#include <asm/cputype.h>
+#include <asm/mach/map.h>
+#include <asm/memory.h>
+#include <asm/system_info.h>
+
+static struct gen_pool *tcm_pool;
+static bool dtcm_present;
+static bool itcm_present;
+
+/* TCM section definitions from the linker */
+extern char __itcm_start, __sitcm_text, __eitcm_text;
+extern char __dtcm_start, __sdtcm_data, __edtcm_data;
+
+/* These will be increased as we run */
+u32 dtcm_end = DTCM_OFFSET;
+u32 itcm_end = ITCM_OFFSET;
+
+/*
+ * TCM memory resources
+ */
+static struct resource dtcm_res = {
+	.name = "DTCM RAM",
+	.start = DTCM_OFFSET,
+	.end = DTCM_OFFSET,
+	.flags = IORESOURCE_MEM
+};
+
+static struct resource itcm_res = {
+	.name = "ITCM RAM",
+	.start = ITCM_OFFSET,
+	.end = ITCM_OFFSET,
+	.flags = IORESOURCE_MEM
+};
+
+static struct map_desc dtcm_iomap[] __initdata = {
+	{
+		.virtual	= DTCM_OFFSET,
+		.pfn		= __phys_to_pfn(DTCM_OFFSET),
+		.length		= 0,
+		.type		= MT_MEMORY_RW_DTCM
+	}
+};
+
+static struct map_desc itcm_iomap[] __initdata = {
+	{
+		.virtual	= ITCM_OFFSET,
+		.pfn		= __phys_to_pfn(ITCM_OFFSET),
+		.length		= 0,
+		.type		= MT_MEMORY_RWX_ITCM,
+	}
+};
+
+/*
+ * Allocate a chunk of TCM memory
+ */
+void *tcm_alloc(size_t len)
+{
+	unsigned long vaddr;
+
+	if (!tcm_pool)
+		return NULL;
+
+	vaddr = gen_pool_alloc(tcm_pool, len);
+	if (!vaddr)
+		return NULL;
+
+	return (void *) vaddr;
+}
+EXPORT_SYMBOL(tcm_alloc);
+
+/*
+ * Free a chunk of TCM memory
+ */
+void tcm_free(void *addr, size_t len)
+{
+	gen_pool_free(tcm_pool, (unsigned long) addr, len);
+}
+EXPORT_SYMBOL(tcm_free);
+
+bool tcm_dtcm_present(void)
+{
+	return dtcm_present;
+}
+EXPORT_SYMBOL(tcm_dtcm_present);
+
+bool tcm_itcm_present(void)
+{
+	return itcm_present;
+}
+EXPORT_SYMBOL(tcm_itcm_present);
+
+static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
+				  u32 *offset)
+{
+	const int tcm_sizes[16] = { 0, -1, -1, 4, 8, 16, 32, 64, 128,
+				    256, 512, 1024, -1, -1, -1, -1 };
+	u32 tcm_region;
+	int tcm_size;
+
+	/*
+	 * If there are more than one TCM bank of this type,
+	 * select the TCM bank to operate on in the TCM selection
+	 * register.
+	 */
+	if (banks > 1)
+		asm("mcr	p15, 0, %0, c9, c2, 0"
+		    : /* No output operands */
+		    : "r" (bank));
+
+	/* Read the special TCM region register c9, 0 */
+	if (!type)
+		asm("mrc	p15, 0, %0, c9, c1, 0"
+		    : "=r" (tcm_region));
+	else
+		asm("mrc	p15, 0, %0, c9, c1, 1"
+		    : "=r" (tcm_region));
+
+	tcm_size = tcm_sizes[(tcm_region >> 2) & 0x0f];
+	if (tcm_size < 0) {
+		pr_err("CPU: %sTCM%d of unknown size\n",
+		       type ? "I" : "D", bank);
+		return -EINVAL;
+	} else if (tcm_size > 32) {
+		pr_err("CPU: %sTCM%d larger than 32k found\n",
+		       type ? "I" : "D", bank);
+		return -EINVAL;
+	} else {
+		pr_info("CPU: found %sTCM%d %dk @ %08x, %senabled\n",
+			type ? "I" : "D",
+			bank,
+			tcm_size,
+			(tcm_region & 0xfffff000U),
+			(tcm_region & 1) ? "" : "not ");
+	}
+
+	/* Not much fun you can do with a size 0 bank */
+	if (tcm_size == 0)
+		return 0;
+
+	/* Force move the TCM bank to where we want it, enable */
+	tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1;
+
+	if (!type)
+		asm("mcr	p15, 0, %0, c9, c1, 0"
+		    : /* No output operands */
+		    : "r" (tcm_region));
+	else
+		asm("mcr	p15, 0, %0, c9, c1, 1"
+		    : /* No output operands */
+		    : "r" (tcm_region));
+
+	/* Increase offset */
+	*offset += (tcm_size << 10);
+
+	pr_info("CPU: moved %sTCM%d %dk to %08x, enabled\n",
+		type ? "I" : "D",
+		bank,
+		tcm_size,
+		(tcm_region & 0xfffff000U));
+	return 0;
+}
+
+/*
+ * This initializes the TCM memory
+ */
+void __init tcm_init(void)
+{
+	u32 tcm_status;
+	u8 dtcm_banks;
+	u8 itcm_banks;
+	size_t dtcm_code_sz = &__edtcm_data - &__sdtcm_data;
+	size_t itcm_code_sz = &__eitcm_text - &__sitcm_text;
+	char *start;
+	char *end;
+	char *ram;
+	int ret;
+	int i;
+
+	/*
+	 * Prior to ARMv5 there is no TCM, and trying to read the status
+	 * register will hang the processor.
+	 */
+	if (cpu_architecture() < CPU_ARCH_ARMv5) {
+		if (dtcm_code_sz || itcm_code_sz)
+			pr_info("CPU TCM: %u bytes of DTCM and %u bytes of "
+				"ITCM code compiled in, but no TCM present "
+				"in pre-v5 CPU\n", dtcm_code_sz, itcm_code_sz);
+		return;
+	}
+
+	tcm_status = read_cpuid_tcmstatus();
+	dtcm_banks = (tcm_status >> 16) & 0x03;
+	itcm_banks = (tcm_status & 0x03);
+
+	/* Values greater than 2 for D/ITCM banks are "reserved" */
+	if (dtcm_banks > 2)
+		dtcm_banks = 0;
+	if (itcm_banks > 2)
+		itcm_banks = 0;
+
+	/* Setup DTCM if present */
+	if (dtcm_banks > 0) {
+		for (i = 0; i < dtcm_banks; i++) {
+			ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end);
+			if (ret)
+				return;
+		}
+		/* This means you compiled more code than fits into DTCM */
+		if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) {
+			pr_info("CPU DTCM: %u bytes of code compiled to "
+				"DTCM but only %lu bytes of DTCM present\n",
+				dtcm_code_sz, (dtcm_end - DTCM_OFFSET));
+			goto no_dtcm;
+		}
+		dtcm_res.end = dtcm_end - 1;
+		request_resource(&iomem_resource, &dtcm_res);
+		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
+		iotable_init(dtcm_iomap, 1);
+		/* Copy data from RAM to DTCM */
+		start = &__sdtcm_data;
+		end   = &__edtcm_data;
+		ram   = &__dtcm_start;
+		memcpy(start, ram, dtcm_code_sz);
+		pr_debug("CPU DTCM: copied data from %p - %p\n",
+			 start, end);
+		dtcm_present = true;
+	} else if (dtcm_code_sz) {
+		pr_info("CPU DTCM: %u bytes of code compiled to DTCM but no "
+			"DTCM banks present in CPU\n", dtcm_code_sz);
+	}
+
+no_dtcm:
+	/* Setup ITCM if present */
+	if (itcm_banks > 0) {
+		for (i = 0; i < itcm_banks; i++) {
+			ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end);
+			if (ret)
+				return;
+		}
+		/* This means you compiled more code than fits into ITCM */
+		if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) {
+			pr_info("CPU ITCM: %u bytes of code compiled to "
+				"ITCM but only %lu bytes of ITCM present\n",
+				itcm_code_sz, (itcm_end - ITCM_OFFSET));
+			return;
+		}
+		itcm_res.end = itcm_end - 1;
+		request_resource(&iomem_resource, &itcm_res);
+		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
+		iotable_init(itcm_iomap, 1);
+		/* Copy code from RAM to ITCM */
+		start = &__sitcm_text;
+		end   = &__eitcm_text;
+		ram   = &__itcm_start;
+		memcpy(start, ram, itcm_code_sz);
+		pr_debug("CPU ITCM: copied code from %p - %p\n",
+			 start, end);
+		itcm_present = true;
+	} else if (itcm_code_sz) {
+		pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no "
+			"ITCM banks present in CPU\n", itcm_code_sz);
+	}
+}
+
+/*
+ * This creates the TCM memory pool and has to be done later,
+ * during the core_initicalls, since the allocator is not yet
+ * up and running when the first initialization runs.
+ */
+static int __init setup_tcm_pool(void)
+{
+	u32 dtcm_pool_start = (u32) &__edtcm_data;
+	u32 itcm_pool_start = (u32) &__eitcm_text;
+	int ret;
+
+	/*
+	 * Set up malloc pool, 2^2 = 4 bytes granularity since
+	 * the TCM is sometimes just 4 KiB. NB: pages and cache
+	 * line alignments does not matter in TCM!
+	 */
+	tcm_pool = gen_pool_create(2, -1);
+
+	pr_debug("Setting up TCM memory pool\n");
+
+	/* Add the rest of DTCM to the TCM pool */
+	if (dtcm_present) {
+		if (dtcm_pool_start < dtcm_end) {
+			ret = gen_pool_add(tcm_pool, dtcm_pool_start,
+					   dtcm_end - dtcm_pool_start, -1);
+			if (ret) {
+				pr_err("CPU DTCM: could not add DTCM " \
+				       "remainder to pool!\n");
+				return ret;
+			}
+			pr_debug("CPU DTCM: Added %08x bytes @ %08x to " \
+				 "the TCM memory pool\n",
+				 dtcm_end - dtcm_pool_start,
+				 dtcm_pool_start);
+		}
+	}
+
+	/* Add the rest of ITCM to the TCM pool */
+	if (itcm_present) {
+		if (itcm_pool_start < itcm_end) {
+			ret = gen_pool_add(tcm_pool, itcm_pool_start,
+					   itcm_end - itcm_pool_start, -1);
+			if (ret) {
+				pr_err("CPU ITCM: could not add ITCM " \
+				       "remainder to pool!\n");
+				return ret;
+			}
+			pr_debug("CPU ITCM: Added %08x bytes @ %08x to " \
+				 "the TCM memory pool\n",
+				 itcm_end - itcm_pool_start,
+				 itcm_pool_start);
+		}
+	}
+	return 0;
+}
+
+core_initcall(setup_tcm_pool);
diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c
new file mode 100644
index 000000000..8ff8dbfbe
--- /dev/null
+++ b/arch/arm/kernel/thumbee.c
@@ -0,0 +1,82 @@
+/*
+ * arch/arm/kernel/thumbee.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/cputype.h>
+#include <asm/system_info.h>
+#include <asm/thread_notify.h>
+
+/*
+ * Access to the ThumbEE Handler Base register
+ */
+static inline unsigned long teehbr_read(void)
+{
+	unsigned long v;
+	asm("mrc	p14, 6, %0, c1, c0, 0\n" : "=r" (v));
+	return v;
+}
+
+static inline void teehbr_write(unsigned long v)
+{
+	asm("mcr	p14, 6, %0, c1, c0, 0\n" : : "r" (v));
+}
+
+static int thumbee_notifier(struct notifier_block *self, unsigned long cmd, void *t)
+{
+	struct thread_info *thread = t;
+
+	switch (cmd) {
+	case THREAD_NOTIFY_FLUSH:
+		teehbr_write(0);
+		break;
+	case THREAD_NOTIFY_SWITCH:
+		current_thread_info()->thumbee_state = teehbr_read();
+		teehbr_write(thread->thumbee_state);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block thumbee_notifier_block = {
+	.notifier_call	= thumbee_notifier,
+};
+
+static int __init thumbee_init(void)
+{
+	unsigned long pfr0;
+	unsigned int cpu_arch = cpu_architecture();
+
+	if (cpu_arch < CPU_ARCH_ARMv7)
+		return 0;
+
+	pfr0 = read_cpuid_ext(CPUID_EXT_PFR0);
+	if ((pfr0 & 0x0000f000) != 0x00001000)
+		return 0;
+
+	pr_info("ThumbEE CPU extension supported.\n");
+	elf_hwcap |= HWCAP_THUMBEE;
+	thread_register_notifier(&thumbee_notifier_block);
+
+	return 0;
+}
+
+late_initcall(thumbee_init);
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
new file mode 100644
index 000000000..a66e37e21
--- /dev/null
+++ b/arch/arm/kernel/time.c
@@ -0,0 +1,125 @@
+/*
+ *  linux/arch/arm/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *  Modifications for ARM (C) 1994-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains the ARM-specific time handling details:
+ *  reading the RTC at bootup, etc...
+ */
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/sched_clock.h>
+#include <linux/smp.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/timer.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <asm/stacktrace.h>
+#include <asm/thread_info.h>
+
+#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) || \
+    defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE)
+/* this needs a better home */
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+#endif	/* pc-style 'CMOS' RTC support */
+
+/* change this if you have some constant time drift */
+#define USECS_PER_JIFFY	(1000000/HZ)
+
+#ifdef CONFIG_SMP
+unsigned long profile_pc(struct pt_regs *regs)
+{
+	struct stackframe frame;
+
+	if (!in_lock_functions(regs->ARM_pc))
+		return regs->ARM_pc;
+
+	arm_get_current_stackframe(regs, &frame);
+	do {
+		int ret = unwind_frame(&frame);
+		if (ret < 0)
+			return 0;
+	} while (in_lock_functions(frame.pc));
+
+	return frame.pc;
+}
+EXPORT_SYMBOL(profile_pc);
+#endif
+
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
+/*
+ * Kernel system timer support.
+ */
+void timer_tick(void)
+{
+	profile_tick(CPU_PROFILING);
+	xtime_update(1);
+#ifndef CONFIG_SMP
+	update_process_times(user_mode(get_irq_regs()));
+#endif
+}
+#endif
+
+static void dummy_clock_access(struct timespec64 *ts)
+{
+	ts->tv_sec = 0;
+	ts->tv_nsec = 0;
+}
+
+static clock_access_fn __read_persistent_clock = dummy_clock_access;
+static clock_access_fn __read_boot_clock = dummy_clock_access;;
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	__read_persistent_clock(ts);
+}
+
+void read_boot_clock64(struct timespec64 *ts)
+{
+	__read_boot_clock(ts);
+}
+
+int __init register_persistent_clock(clock_access_fn read_boot,
+				     clock_access_fn read_persistent)
+{
+	/* Only allow the clockaccess functions to be registered once */
+	if (__read_persistent_clock == dummy_clock_access &&
+	    __read_boot_clock == dummy_clock_access) {
+		if (read_boot)
+			__read_boot_clock = read_boot;
+		if (read_persistent)
+			__read_persistent_clock = read_persistent;
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+void __init time_init(void)
+{
+	if (machine_desc->init_time) {
+		machine_desc->init_time();
+	} else {
+#ifdef CONFIG_COMMON_CLK
+		of_clk_init(NULL);
+#endif
+		clocksource_of_init();
+	}
+}
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
new file mode 100644
index 000000000..08b7847bf
--- /dev/null
+++ b/arch/arm/kernel/topology.c
@@ -0,0 +1,318 @@
+/*
+ * arch/arm/kernel/topology.c
+ *
+ * Copyright (C) 2011 Linaro Limited.
+ * Written by: Vincent Guittot
+ *
+ * based on arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/cputype.h>
+#include <asm/topology.h>
+
+/*
+ * cpu capacity scale management
+ */
+
+/*
+ * cpu capacity table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_capacity field so the scheduler
+ * can take this difference into account during load balance. A per cpu
+ * structure is preferred because each CPU updates its own cpu_capacity field
+ * during the load balance except for idle cores. One idle core is selected
+ * to run the rebalance_domains for all idle cores and the cpu_capacity can be
+ * updated during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(cpu_scale, cpu);
+}
+
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+{
+	per_cpu(cpu_scale, cpu) = capacity;
+}
+
+#ifdef CONFIG_OF
+struct cpu_efficiency {
+	const char *compatible;
+	unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ *   0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_CAPACITY_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+	{"arm,cortex-a15", 3891},
+	{"arm,cortex-a7",  2048},
+	{NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu)	__cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to  (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_capacity field such that an
+ * 'average' CPU is of middle capacity. Also see the comments near
+ * table_efficiency[] and update_cpu_capacity().
+ */
+static void __init parse_dt_topology(void)
+{
+	const struct cpu_efficiency *cpu_eff;
+	struct device_node *cn = NULL;
+	unsigned long min_capacity = ULONG_MAX;
+	unsigned long max_capacity = 0;
+	unsigned long capacity = 0;
+	int cpu = 0;
+
+	__cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+				 GFP_NOWAIT);
+
+	for_each_possible_cpu(cpu) {
+		const u32 *rate;
+		int len;
+
+		/* too early to use cpu->of_node */
+		cn = of_get_cpu_node(cpu, NULL);
+		if (!cn) {
+			pr_err("missing device node for CPU %d\n", cpu);
+			continue;
+		}
+
+		for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+			if (of_device_is_compatible(cn, cpu_eff->compatible))
+				break;
+
+		if (cpu_eff->compatible == NULL)
+			continue;
+
+		rate = of_get_property(cn, "clock-frequency", &len);
+		if (!rate || len != 4) {
+			pr_err("%s missing clock-frequency property\n",
+				cn->full_name);
+			continue;
+		}
+
+		capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+		/* Save min capacity of the system */
+		if (capacity < min_capacity)
+			min_capacity = capacity;
+
+		/* Save max capacity of the system */
+		if (capacity > max_capacity)
+			max_capacity = capacity;
+
+		cpu_capacity(cpu) = capacity;
+	}
+
+	/* If min and max capacities are equals, we bypass the update of the
+	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
+	 * compute a middle_capacity factor that will ensure that the capacity
+	 * of an 'average' CPU of the system will be as close as possible to
+	 * SCHED_CAPACITY_SCALE, which is the default value, but with the
+	 * constraint explained near table_efficiency[].
+	 */
+	if (4*max_capacity < (3*(max_capacity + min_capacity)))
+		middle_capacity = (min_capacity + max_capacity)
+				>> (SCHED_CAPACITY_SHIFT+1);
+	else
+		middle_capacity = ((max_capacity / 3)
+				>> (SCHED_CAPACITY_SHIFT-1)) + 1;
+
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_capacity table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+static void update_cpu_capacity(unsigned int cpu)
+{
+	if (!cpu_capacity(cpu))
+		return;
+
+	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+
+	pr_info("CPU%u: update cpu_capacity %lu\n",
+		cpu, arch_scale_cpu_capacity(NULL, cpu));
+}
+
+#else
+static inline void parse_dt_topology(void) {}
+static inline void update_cpu_capacity(unsigned int cpuid) {}
+#endif
+
+ /*
+ * cpu topology table
+ */
+struct cputopo_arm cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topology[cpu].core_sibling;
+}
+
+/*
+ * The current assumption is that we can power gate each core independently.
+ * This will be superseded by DT binding once available.
+ */
+const struct cpumask *cpu_corepower_mask(int cpu)
+{
+	return &cpu_topology[cpu].thread_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+	struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->socket_id != cpu_topo->socket_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+	smp_wmb();
+}
+
+/*
+ * store_cpu_topology is called at boot when only one cpu is running
+ * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
+ * which prevents simultaneous write access to cpu_topology array
+ */
+void store_cpu_topology(unsigned int cpuid)
+{
+	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
+	unsigned int mpidr;
+
+	/* If the cpu topology has been already set, just return */
+	if (cpuid_topo->core_id != -1)
+		return;
+
+	mpidr = read_cpuid_mpidr();
+
+	/* create cpu topology mapping */
+	if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) {
+		/*
+		 * This is a multiprocessor system
+		 * multiprocessor format & multiprocessor mode field are set
+		 */
+
+		if (mpidr & MPIDR_MT_BITMASK) {
+			/* core performance interdependency */
+			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+		} else {
+			/* largely independent cores */
+			cpuid_topo->thread_id = -1;
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+		}
+	} else {
+		/*
+		 * This is an uniprocessor system
+		 * we are in multiprocessor format but uniprocessor system
+		 * or in the old uniprocessor format
+		 */
+		cpuid_topo->thread_id = -1;
+		cpuid_topo->core_id = 0;
+		cpuid_topo->socket_id = -1;
+	}
+
+	update_siblings_masks(cpuid);
+
+	update_cpu_capacity(cpuid);
+
+	pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
+		cpuid, cpu_topology[cpuid].thread_id,
+		cpu_topology[cpuid].core_id,
+		cpu_topology[cpuid].socket_id, mpidr);
+}
+
+static inline int cpu_corepower_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN;
+}
+
+static struct sched_domain_topology_level arm_topology[] = {
+#ifdef CONFIG_SCHED_MC
+	{ cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) },
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void __init init_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	/* init core mask and capacity */
+	for_each_possible_cpu(cpu) {
+		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
+
+		cpu_topo->thread_id = -1;
+		cpu_topo->core_id =  -1;
+		cpu_topo->socket_id = -1;
+		cpumask_clear(&cpu_topo->core_sibling);
+		cpumask_clear(&cpu_topo->thread_sibling);
+
+		set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
+	}
+	smp_wmb();
+
+	parse_dt_topology();
+
+	/* Set scheduler topology descriptor */
+	set_sched_topology(arm_topology);
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
new file mode 100644
index 000000000..3dce1a342
--- /dev/null
+++ b/arch/arm/kernel/traps.c
@@ -0,0 +1,889 @@
+/*
+ *  linux/arch/arm/kernel/traps.c
+ *
+ *  Copyright (C) 1995-2009 Russell King
+ *  Fragments that appear the same as linux/arch/i386/kernel/traps.c (C) Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  'traps.c' handles hardware exceptions after we have saved some state in
+ *  'linux/arch/arm/lib/traps.S'.  Mostly a debugging aid, but will probably
+ *  kill the offending process.
+ */
+#include <linux/signal.h>
+#include <linux/personality.h>
+#include <linux/kallsyms.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/kexec.h>
+#include <linux/bug.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+#include <linux/atomic.h>
+#include <asm/cacheflush.h>
+#include <asm/exception.h>
+#include <asm/unistd.h>
+#include <asm/traps.h>
+#include <asm/ptrace.h>
+#include <asm/unwind.h>
+#include <asm/tls.h>
+#include <asm/system_misc.h>
+#include <asm/opcodes.h>
+
+
+static const char *handler[]= {
+	"prefetch abort",
+	"data abort",
+	"address exception",
+	"interrupt",
+	"undefined instruction",
+};
+
+void *vectors_page;
+
+#ifdef CONFIG_DEBUG_USER
+unsigned int user_debug;
+
+static int __init user_debug_setup(char *str)
+{
+	get_option(&str, &user_debug);
+	return 1;
+}
+__setup("user_debug=", user_debug_setup);
+#endif
+
+static void dump_mem(const char *, const char *, unsigned long, unsigned long);
+
+void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame)
+{
+#ifdef CONFIG_KALLSYMS
+	printk("[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", where, (void *)where, from, (void *)from);
+#else
+	printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from);
+#endif
+
+	if (in_exception_text(where))
+		dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs));
+}
+
+#ifndef CONFIG_ARM_UNWIND
+/*
+ * Stack pointers should always be within the kernels view of
+ * physical memory.  If it is not there, then we can't dump
+ * out any information relating to the stack.
+ */
+static int verify_stack(unsigned long sp)
+{
+	if (sp < PAGE_OFFSET ||
+	    (sp > (unsigned long)high_memory && high_memory != NULL))
+		return -EFAULT;
+
+	return 0;
+}
+#endif
+
+/*
+ * Dump out the contents of some memory nicely...
+ */
+static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
+		     unsigned long top)
+{
+	unsigned long first;
+	mm_segment_t fs;
+	int i;
+
+	/*
+	 * We need to switch to kernel mode so that we can use __get_user
+	 * to safely read from kernel space.  Note that we now dump the
+	 * code first, just in case the backtrace kills us.
+	 */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	printk("%s%s(0x%08lx to 0x%08lx)\n", lvl, str, bottom, top);
+
+	for (first = bottom & ~31; first < top; first += 32) {
+		unsigned long p;
+		char str[sizeof(" 12345678") * 8 + 1];
+
+		memset(str, ' ', sizeof(str));
+		str[sizeof(str) - 1] = '\0';
+
+		for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
+			if (p >= bottom && p < top) {
+				unsigned long val;
+				if (__get_user(val, (unsigned long *)p) == 0)
+					sprintf(str + i * 9, " %08lx", val);
+				else
+					sprintf(str + i * 9, " ????????");
+			}
+		}
+		printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
+	}
+
+	set_fs(fs);
+}
+
+static void dump_instr(const char *lvl, struct pt_regs *regs)
+{
+	unsigned long addr = instruction_pointer(regs);
+	const int thumb = thumb_mode(regs);
+	const int width = thumb ? 4 : 8;
+	mm_segment_t fs;
+	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
+	int i;
+
+	/*
+	 * We need to switch to kernel mode so that we can use __get_user
+	 * to safely read from kernel space.  Note that we now dump the
+	 * code first, just in case the backtrace kills us.
+	 */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	for (i = -4; i < 1 + !!thumb; i++) {
+		unsigned int val, bad;
+
+		if (thumb)
+			bad = __get_user(val, &((u16 *)addr)[i]);
+		else
+			bad = __get_user(val, &((u32 *)addr)[i]);
+
+		if (!bad)
+			p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ",
+					width, val);
+		else {
+			p += sprintf(p, "bad PC value");
+			break;
+		}
+	}
+	printk("%sCode: %s\n", lvl, str);
+
+	set_fs(fs);
+}
+
+#ifdef CONFIG_ARM_UNWIND
+static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+{
+	unwind_backtrace(regs, tsk);
+}
+#else
+static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+{
+	unsigned int fp, mode;
+	int ok = 1;
+
+	printk("Backtrace: ");
+
+	if (!tsk)
+		tsk = current;
+
+	if (regs) {
+		fp = frame_pointer(regs);
+		mode = processor_mode(regs);
+	} else if (tsk != current) {
+		fp = thread_saved_fp(tsk);
+		mode = 0x10;
+	} else {
+		asm("mov %0, fp" : "=r" (fp) : : "cc");
+		mode = 0x10;
+	}
+
+	if (!fp) {
+		pr_cont("no frame pointer");
+		ok = 0;
+	} else if (verify_stack(fp)) {
+		pr_cont("invalid frame pointer 0x%08x", fp);
+		ok = 0;
+	} else if (fp < (unsigned long)end_of_stack(tsk))
+		pr_cont("frame pointer underflow");
+	pr_cont("\n");
+
+	if (ok)
+		c_backtrace(fp, mode);
+}
+#endif
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+	dump_backtrace(NULL, tsk);
+	barrier();
+}
+
+#ifdef CONFIG_PREEMPT
+#define S_PREEMPT " PREEMPT"
+#else
+#define S_PREEMPT ""
+#endif
+#ifdef CONFIG_SMP
+#define S_SMP " SMP"
+#else
+#define S_SMP ""
+#endif
+#ifdef CONFIG_THUMB2_KERNEL
+#define S_ISA " THUMB2"
+#else
+#define S_ISA " ARM"
+#endif
+
+static int __die(const char *str, int err, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	static int die_counter;
+	int ret;
+
+	pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP S_ISA "\n",
+	         str, err, ++die_counter);
+
+	/* trap and error numbers are mostly meaningless on ARM */
+	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
+	if (ret == NOTIFY_STOP)
+		return 1;
+
+	print_modules();
+	__show_regs(regs);
+	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
+		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
+
+	if (!user_mode(regs) || in_interrupt()) {
+		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
+			 THREAD_SIZE + (unsigned long)task_stack_page(tsk));
+		dump_backtrace(regs, tsk);
+		dump_instr(KERN_EMERG, regs);
+	}
+
+	return 0;
+}
+
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+static unsigned long oops_begin(void)
+{
+	int cpu;
+	unsigned long flags;
+
+	oops_enter();
+
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (!arch_spin_trylock(&die_lock)) {
+		if (cpu == die_owner)
+			/* nested oops. should stop eventually */;
+		else
+			arch_spin_lock(&die_lock);
+	}
+	die_nest_count++;
+	die_owner = cpu;
+	console_verbose();
+	bust_spinlocks(1);
+	return flags;
+}
+
+static void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
+	bust_spinlocks(0);
+	die_owner = -1;
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	die_nest_count--;
+	if (!die_nest_count)
+		/* Nest count reaches zero, release the lock. */
+		arch_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
+	oops_exit();
+
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+	if (panic_on_oops)
+		panic("Fatal exception");
+	if (signr)
+		do_exit(signr);
+}
+
+/*
+ * This function is protected against re-entrancy.
+ */
+void die(const char *str, struct pt_regs *regs, int err)
+{
+	enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE;
+	unsigned long flags = oops_begin();
+	int sig = SIGSEGV;
+
+	if (!user_mode(regs))
+		bug_type = report_bug(regs->ARM_pc, regs);
+	if (bug_type != BUG_TRAP_TYPE_NONE)
+		str = "Oops - BUG";
+
+	if (__die(str, err, regs))
+		sig = 0;
+
+	oops_end(flags, regs, sig);
+}
+
+void arm_notify_die(const char *str, struct pt_regs *regs,
+		struct siginfo *info, unsigned long err, unsigned long trap)
+{
+	if (user_mode(regs)) {
+		current->thread.error_code = err;
+		current->thread.trap_no = trap;
+
+		force_sig_info(info->si_signo, info, current);
+	} else {
+		die(str, regs, err);
+	}
+}
+
+#ifdef CONFIG_GENERIC_BUG
+
+int is_valid_bugaddr(unsigned long pc)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	u16 bkpt;
+	u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE);
+#else
+	u32 bkpt;
+	u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE);
+#endif
+
+	if (probe_kernel_address((unsigned *)pc, bkpt))
+		return 0;
+
+	return bkpt == insn;
+}
+
+#endif
+
+static LIST_HEAD(undef_hook);
+static DEFINE_RAW_SPINLOCK(undef_lock);
+
+void register_undef_hook(struct undef_hook *hook)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_add(&hook->node, &undef_hook);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+}
+
+void unregister_undef_hook(struct undef_hook *hook)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_del(&hook->node);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+}
+
+static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+{
+	struct undef_hook *hook;
+	unsigned long flags;
+	int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL;
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_for_each_entry(hook, &undef_hook, node)
+		if ((instr & hook->instr_mask) == hook->instr_val &&
+		    (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val)
+			fn = hook->fn;
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+
+	return fn ? fn(regs, instr) : 1;
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
+	unsigned int instr;
+	siginfo_t info;
+	void __user *pc;
+
+	pc = (void __user *)instruction_pointer(regs);
+
+	if (processor_mode(regs) == SVC_MODE) {
+#ifdef CONFIG_THUMB2_KERNEL
+		if (thumb_mode(regs)) {
+			instr = __mem_to_opcode_thumb16(((u16 *)pc)[0]);
+			if (is_wide_instruction(instr)) {
+				u16 inst2;
+				inst2 = __mem_to_opcode_thumb16(((u16 *)pc)[1]);
+				instr = __opcode_thumb32_compose(instr, inst2);
+			}
+		} else
+#endif
+			instr = __mem_to_opcode_arm(*(u32 *) pc);
+	} else if (thumb_mode(regs)) {
+		if (get_user(instr, (u16 __user *)pc))
+			goto die_sig;
+		instr = __mem_to_opcode_thumb16(instr);
+		if (is_wide_instruction(instr)) {
+			unsigned int instr2;
+			if (get_user(instr2, (u16 __user *)pc+1))
+				goto die_sig;
+			instr2 = __mem_to_opcode_thumb16(instr2);
+			instr = __opcode_thumb32_compose(instr, instr2);
+		}
+	} else {
+		if (get_user(instr, (u32 __user *)pc))
+			goto die_sig;
+		instr = __mem_to_opcode_arm(instr);
+	}
+
+	if (call_undef_hook(regs, instr) == 0)
+		return;
+
+die_sig:
+#ifdef CONFIG_DEBUG_USER
+	if (user_debug & UDBG_UNDEFINED) {
+		pr_info("%s (%d): undefined instruction: pc=%p\n",
+			current->comm, task_pid_nr(current), pc);
+		__show_regs(regs);
+		dump_instr(KERN_INFO, regs);
+	}
+#endif
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLOPC;
+	info.si_addr  = pc;
+
+	arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
+}
+
+/*
+ * Handle FIQ similarly to NMI on x86 systems.
+ *
+ * The runtime environment for NMIs is extremely restrictive
+ * (NMIs can pre-empt critical sections meaning almost all locking is
+ * forbidden) meaning this default FIQ handling must only be used in
+ * circumstances where non-maskability improves robustness, such as
+ * watchdog or debug logic.
+ *
+ * This handler is not appropriate for general purpose use in drivers
+ * platform code and can be overrideen using set_fiq_handler.
+ */
+asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	nmi_enter();
+
+	/* nop. FIQ handlers for special arch/arm features can be added here. */
+
+	nmi_exit();
+
+	set_irq_regs(old_regs);
+}
+
+/*
+ * bad_mode handles the impossible case in the vectors.  If you see one of
+ * these, then it's extremely serious, and could mean you have buggy hardware.
+ * It never returns, and never tries to sync.  We hope that we can at least
+ * dump out some state information...
+ */
+asmlinkage void bad_mode(struct pt_regs *regs, int reason)
+{
+	console_verbose();
+
+	pr_crit("Bad mode in %s handler detected\n", handler[reason]);
+
+	die("Oops - bad mode", regs, 0);
+	local_irq_disable();
+	panic("bad mode");
+}
+
+static int bad_syscall(int n, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if ((current->personality & PER_MASK) != PER_LINUX) {
+		send_sig(SIGSEGV, current, 1);
+		return regs->ARM_r0;
+	}
+
+#ifdef CONFIG_DEBUG_USER
+	if (user_debug & UDBG_SYSCALL) {
+		pr_err("[%d] %s: obsolete system call %08x.\n",
+			task_pid_nr(current), current->comm, n);
+		dump_instr(KERN_ERR, regs);
+	}
+#endif
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLTRP;
+	info.si_addr  = (void __user *)instruction_pointer(regs) -
+			 (thumb_mode(regs) ? 2 : 4);
+
+	arm_notify_die("Oops - bad syscall", regs, &info, n, 0);
+
+	return regs->ARM_r0;
+}
+
+static inline int
+__do_cache_op(unsigned long start, unsigned long end)
+{
+	int ret;
+
+	do {
+		unsigned long chunk = min(PAGE_SIZE, end - start);
+
+		if (fatal_signal_pending(current))
+			return 0;
+
+		ret = flush_cache_user_range(start, start + chunk);
+		if (ret)
+			return ret;
+
+		cond_resched();
+		start += chunk;
+	} while (start < end);
+
+	return 0;
+}
+
+static inline int
+do_cache_op(unsigned long start, unsigned long end, int flags)
+{
+	if (end < start || flags)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ, start, end - start))
+		return -EFAULT;
+
+	return __do_cache_op(start, end);
+}
+
+/*
+ * Handle all unrecognised system calls.
+ *  0x9f0000 - 0x9fffff are some more esoteric system calls
+ */
+#define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE)
+asmlinkage int arm_syscall(int no, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if ((no >> 16) != (__ARM_NR_BASE>> 16))
+		return bad_syscall(no, regs);
+
+	switch (no & 0xffff) {
+	case 0: /* branch through 0 */
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code  = SEGV_MAPERR;
+		info.si_addr  = NULL;
+
+		arm_notify_die("branch through zero", regs, &info, 0, 0);
+		return 0;
+
+	case NR(breakpoint): /* SWI BREAK_POINT */
+		regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
+		ptrace_break(current, regs);
+		return regs->ARM_r0;
+
+	/*
+	 * Flush a region from virtual address 'r0' to virtual address 'r1'
+	 * _exclusive_.  There is no alignment requirement on either address;
+	 * user space does not need to know the hardware cache layout.
+	 *
+	 * r2 contains flags.  It should ALWAYS be passed as ZERO until it
+	 * is defined to be something else.  For now we ignore it, but may
+	 * the fires of hell burn in your belly if you break this rule. ;)
+	 *
+	 * (at a later date, we may want to allow this call to not flush
+	 * various aspects of the cache.  Passing '0' will guarantee that
+	 * everything necessary gets flushed to maintain consistency in
+	 * the specified region).
+	 */
+	case NR(cacheflush):
+		return do_cache_op(regs->ARM_r0, regs->ARM_r1, regs->ARM_r2);
+
+	case NR(usr26):
+		if (!(elf_hwcap & HWCAP_26BIT))
+			break;
+		regs->ARM_cpsr &= ~MODE32_BIT;
+		return regs->ARM_r0;
+
+	case NR(usr32):
+		if (!(elf_hwcap & HWCAP_26BIT))
+			break;
+		regs->ARM_cpsr |= MODE32_BIT;
+		return regs->ARM_r0;
+
+	case NR(set_tls):
+		set_tls(regs->ARM_r0);
+		return 0;
+
+#ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG
+	/*
+	 * Atomically store r1 in *r2 if *r2 is equal to r0 for user space.
+	 * Return zero in r0 if *MEM was changed or non-zero if no exchange
+	 * happened.  Also set the user C flag accordingly.
+	 * If access permissions have to be fixed up then non-zero is
+	 * returned and the operation has to be re-attempted.
+	 *
+	 * *NOTE*: This is a ghost syscall private to the kernel.  Only the
+	 * __kuser_cmpxchg code in entry-armv.S should be aware of its
+	 * existence.  Don't ever use this from user code.
+	 */
+	case NR(cmpxchg):
+	for (;;) {
+		extern void do_DataAbort(unsigned long addr, unsigned int fsr,
+					 struct pt_regs *regs);
+		unsigned long val;
+		unsigned long addr = regs->ARM_r2;
+		struct mm_struct *mm = current->mm;
+		pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+		spinlock_t *ptl;
+
+		regs->ARM_cpsr &= ~PSR_C_BIT;
+		down_read(&mm->mmap_sem);
+		pgd = pgd_offset(mm, addr);
+		if (!pgd_present(*pgd))
+			goto bad_access;
+		pmd = pmd_offset(pgd, addr);
+		if (!pmd_present(*pmd))
+			goto bad_access;
+		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+		if (!pte_present(*pte) || !pte_write(*pte) || !pte_dirty(*pte)) {
+			pte_unmap_unlock(pte, ptl);
+			goto bad_access;
+		}
+		val = *(unsigned long *)addr;
+		val -= regs->ARM_r0;
+		if (val == 0) {
+			*(unsigned long *)addr = regs->ARM_r1;
+			regs->ARM_cpsr |= PSR_C_BIT;
+		}
+		pte_unmap_unlock(pte, ptl);
+		up_read(&mm->mmap_sem);
+		return val;
+
+		bad_access:
+		up_read(&mm->mmap_sem);
+		/* simulate a write access fault */
+		do_DataAbort(addr, 15 + (1 << 11), regs);
+	}
+#endif
+
+	default:
+		/* Calls 9f00xx..9f07ff are defined to return -ENOSYS
+		   if not implemented, rather than raising SIGILL.  This
+		   way the calling program can gracefully determine whether
+		   a feature is supported.  */
+		if ((no & 0xffff) <= 0x7ff)
+			return -ENOSYS;
+		break;
+	}
+#ifdef CONFIG_DEBUG_USER
+	/*
+	 * experience shows that these seem to indicate that
+	 * something catastrophic has happened
+	 */
+	if (user_debug & UDBG_SYSCALL) {
+		pr_err("[%d] %s: arm syscall %d\n",
+		       task_pid_nr(current), current->comm, no);
+		dump_instr("", regs);
+		if (user_mode(regs)) {
+			__show_regs(regs);
+			c_backtrace(frame_pointer(regs), processor_mode(regs));
+		}
+	}
+#endif
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLTRP;
+	info.si_addr  = (void __user *)instruction_pointer(regs) -
+			 (thumb_mode(regs) ? 2 : 4);
+
+	arm_notify_die("Oops - bad syscall(2)", regs, &info, no, 0);
+	return 0;
+}
+
+#ifdef CONFIG_TLS_REG_EMUL
+
+/*
+ * We might be running on an ARMv6+ processor which should have the TLS
+ * register but for some reason we can't use it, or maybe an SMP system
+ * using a pre-ARMv6 processor (there are apparently a few prototypes like
+ * that in existence) and therefore access to that register must be
+ * emulated.
+ */
+
+static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
+{
+	int reg = (instr >> 12) & 15;
+	if (reg == 15)
+		return 1;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
+	regs->ARM_pc += 4;
+	return 0;
+}
+
+static struct undef_hook arm_mrc_hook = {
+	.instr_mask	= 0x0fff0fff,
+	.instr_val	= 0x0e1d0f70,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= get_tp_trap,
+};
+
+static int __init arm_mrc_hook_init(void)
+{
+	register_undef_hook(&arm_mrc_hook);
+	return 0;
+}
+
+late_initcall(arm_mrc_hook_init);
+
+#endif
+
+void __bad_xchg(volatile void *ptr, int size)
+{
+	pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
+	       __builtin_return_address(0), ptr, size);
+	BUG();
+}
+EXPORT_SYMBOL(__bad_xchg);
+
+/*
+ * A data abort trap was taken, but we did not handle the instruction.
+ * Try to abort the user program, or panic if it was the kernel.
+ */
+asmlinkage void
+baddataabort(int code, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned long addr = instruction_pointer(regs);
+	siginfo_t info;
+
+#ifdef CONFIG_DEBUG_USER
+	if (user_debug & UDBG_BADABORT) {
+		pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
+		       task_pid_nr(current), current->comm, code, instr);
+		dump_instr(KERN_ERR, regs);
+		show_pte(current->mm, addr);
+	}
+#endif
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLOPC;
+	info.si_addr  = (void __user *)addr;
+
+	arm_notify_die("unknown data abort code", regs, &info, instr, 0);
+}
+
+void __readwrite_bug(const char *fn)
+{
+	pr_err("%s called, but not implemented\n", fn);
+	BUG();
+}
+EXPORT_SYMBOL(__readwrite_bug);
+
+void __pte_error(const char *file, int line, pte_t pte)
+{
+	pr_err("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte));
+}
+
+void __pmd_error(const char *file, int line, pmd_t pmd)
+{
+	pr_err("%s:%d: bad pmd %08llx.\n", file, line, (long long)pmd_val(pmd));
+}
+
+void __pgd_error(const char *file, int line, pgd_t pgd)
+{
+	pr_err("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd));
+}
+
+asmlinkage void __div0(void)
+{
+	pr_err("Division by zero in kernel.\n");
+	dump_stack();
+}
+EXPORT_SYMBOL(__div0);
+
+void abort(void)
+{
+	BUG();
+
+	/* if that doesn't kill us, halt */
+	panic("Oops failed to kill thread");
+}
+EXPORT_SYMBOL(abort);
+
+void __init trap_init(void)
+{
+	return;
+}
+
+#ifdef CONFIG_KUSER_HELPERS
+static void __init kuser_init(void *vectors)
+{
+	extern char __kuser_helper_start[], __kuser_helper_end[];
+	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+
+	memcpy(vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
+
+	/*
+	 * vectors + 0xfe0 = __kuser_get_tls
+	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
+	 */
+	if (tls_emu || has_tls_reg)
+		memcpy(vectors + 0xfe0, vectors + 0xfe8, 4);
+}
+#else
+static inline void __init kuser_init(void *vectors)
+{
+}
+#endif
+
+void __init early_trap_init(void *vectors_base)
+{
+#ifndef CONFIG_CPU_V7M
+	unsigned long vectors = (unsigned long)vectors_base;
+	extern char __stubs_start[], __stubs_end[];
+	extern char __vectors_start[], __vectors_end[];
+	unsigned i;
+
+	vectors_page = vectors_base;
+
+	/*
+	 * Poison the vectors page with an undefined instruction.  This
+	 * instruction is chosen to be undefined for both ARM and Thumb
+	 * ISAs.  The Thumb version is an undefined instruction with a
+	 * branch back to the undefined instruction.
+	 */
+	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+		((u32 *)vectors_base)[i] = 0xe7fddef1;
+
+	/*
+	 * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
+	 * into the vector page, mapped at 0xffff0000, and ensure these
+	 * are visible to the instruction stream.
+	 */
+	memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
+	memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
+
+	kuser_init(vectors_base);
+
+	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
+	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
+#else /* ifndef CONFIG_CPU_V7M */
+	/*
+	 * on V7-M there is no need to copy the vector table to a dedicated
+	 * memory area. The address is configurable and so a table in the kernel
+	 * image can be used.
+	 */
+#endif
+}
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
new file mode 100644
index 000000000..0bee233fe
--- /dev/null
+++ b/arch/arm/kernel/unwind.c
@@ -0,0 +1,551 @@
+/*
+ * arch/arm/kernel/unwind.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * Stack unwinding support for ARM
+ *
+ * An ARM EABI version of gcc is required to generate the unwind
+ * tables. For information about the structure of the unwind tables,
+ * see "Exception Handling ABI for the ARM Architecture" at:
+ *
+ * http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
+ */
+
+#ifndef __CHECKER__
+#if !defined (__ARM_EABI__)
+#warning Your compiler does not have EABI support.
+#warning    ARM unwind is known to compile only with EABI compilers.
+#warning    Change compiler or disable ARM_UNWIND option.
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) && !defined(__clang__)
+#warning Your compiler is too buggy; it is known to not compile ARM unwind support.
+#warning    Change compiler or disable ARM_UNWIND option.
+#endif
+#endif /* __CHECKER__ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#include <asm/stacktrace.h>
+#include <asm/traps.h>
+#include <asm/unwind.h>
+
+/* Dummy functions to avoid linker complaints */
+void __aeabi_unwind_cpp_pr0(void)
+{
+};
+EXPORT_SYMBOL(__aeabi_unwind_cpp_pr0);
+
+void __aeabi_unwind_cpp_pr1(void)
+{
+};
+EXPORT_SYMBOL(__aeabi_unwind_cpp_pr1);
+
+void __aeabi_unwind_cpp_pr2(void)
+{
+};
+EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2);
+
+struct unwind_ctrl_block {
+	unsigned long vrs[16];		/* virtual register set */
+	const unsigned long *insn;	/* pointer to the current instructions word */
+	unsigned long sp_high;		/* highest value of sp allowed */
+	/*
+	 * 1 : check for stack overflow for each register pop.
+	 * 0 : save overhead if there is plenty of stack remaining.
+	 */
+	int check_each_pop;
+	int entries;			/* number of entries left to interpret */
+	int byte;			/* current byte number in the instructions word */
+};
+
+enum regs {
+#ifdef CONFIG_THUMB2_KERNEL
+	FP = 7,
+#else
+	FP = 11,
+#endif
+	SP = 13,
+	LR = 14,
+	PC = 15
+};
+
+extern const struct unwind_idx __start_unwind_idx[];
+static const struct unwind_idx *__origin_unwind_idx;
+extern const struct unwind_idx __stop_unwind_idx[];
+
+static DEFINE_SPINLOCK(unwind_lock);
+static LIST_HEAD(unwind_tables);
+
+/* Convert a prel31 symbol to an absolute address */
+#define prel31_to_addr(ptr)				\
+({							\
+	/* sign-extend to 32 bits */			\
+	long offset = (((long)*(ptr)) << 1) >> 1;	\
+	(unsigned long)(ptr) + offset;			\
+})
+
+/*
+ * Binary search in the unwind index. The entries are
+ * guaranteed to be sorted in ascending order by the linker.
+ *
+ * start = first entry
+ * origin = first entry with positive offset (or stop if there is no such entry)
+ * stop - 1 = last entry
+ */
+static const struct unwind_idx *search_index(unsigned long addr,
+				       const struct unwind_idx *start,
+				       const struct unwind_idx *origin,
+				       const struct unwind_idx *stop)
+{
+	unsigned long addr_prel31;
+
+	pr_debug("%s(%08lx, %p, %p, %p)\n",
+			__func__, addr, start, origin, stop);
+
+	/*
+	 * only search in the section with the matching sign. This way the
+	 * prel31 numbers can be compared as unsigned longs.
+	 */
+	if (addr < (unsigned long)start)
+		/* negative offsets: [start; origin) */
+		stop = origin;
+	else
+		/* positive offsets: [origin; stop) */
+		start = origin;
+
+	/* prel31 for address relavive to start */
+	addr_prel31 = (addr - (unsigned long)start) & 0x7fffffff;
+
+	while (start < stop - 1) {
+		const struct unwind_idx *mid = start + ((stop - start) >> 1);
+
+		/*
+		 * As addr_prel31 is relative to start an offset is needed to
+		 * make it relative to mid.
+		 */
+		if (addr_prel31 - ((unsigned long)mid - (unsigned long)start) <
+				mid->addr_offset)
+			stop = mid;
+		else {
+			/* keep addr_prel31 relative to start */
+			addr_prel31 -= ((unsigned long)mid -
+					(unsigned long)start);
+			start = mid;
+		}
+	}
+
+	if (likely(start->addr_offset <= addr_prel31))
+		return start;
+	else {
+		pr_warn("unwind: Unknown symbol address %08lx\n", addr);
+		return NULL;
+	}
+}
+
+static const struct unwind_idx *unwind_find_origin(
+		const struct unwind_idx *start, const struct unwind_idx *stop)
+{
+	pr_debug("%s(%p, %p)\n", __func__, start, stop);
+	while (start < stop) {
+		const struct unwind_idx *mid = start + ((stop - start) >> 1);
+
+		if (mid->addr_offset >= 0x40000000)
+			/* negative offset */
+			start = mid + 1;
+		else
+			/* positive offset */
+			stop = mid;
+	}
+	pr_debug("%s -> %p\n", __func__, stop);
+	return stop;
+}
+
+static const struct unwind_idx *unwind_find_idx(unsigned long addr)
+{
+	const struct unwind_idx *idx = NULL;
+	unsigned long flags;
+
+	pr_debug("%s(%08lx)\n", __func__, addr);
+
+	if (core_kernel_text(addr)) {
+		if (unlikely(!__origin_unwind_idx))
+			__origin_unwind_idx =
+				unwind_find_origin(__start_unwind_idx,
+						__stop_unwind_idx);
+
+		/* main unwind table */
+		idx = search_index(addr, __start_unwind_idx,
+				   __origin_unwind_idx,
+				   __stop_unwind_idx);
+	} else {
+		/* module unwind tables */
+		struct unwind_table *table;
+
+		spin_lock_irqsave(&unwind_lock, flags);
+		list_for_each_entry(table, &unwind_tables, list) {
+			if (addr >= table->begin_addr &&
+			    addr < table->end_addr) {
+				idx = search_index(addr, table->start,
+						   table->origin,
+						   table->stop);
+				/* Move-to-front to exploit common traces */
+				list_move(&table->list, &unwind_tables);
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&unwind_lock, flags);
+	}
+
+	pr_debug("%s: idx = %p\n", __func__, idx);
+	return idx;
+}
+
+static unsigned long unwind_get_byte(struct unwind_ctrl_block *ctrl)
+{
+	unsigned long ret;
+
+	if (ctrl->entries <= 0) {
+		pr_warn("unwind: Corrupt unwind table\n");
+		return 0;
+	}
+
+	ret = (*ctrl->insn >> (ctrl->byte * 8)) & 0xff;
+
+	if (ctrl->byte == 0) {
+		ctrl->insn++;
+		ctrl->entries--;
+		ctrl->byte = 3;
+	} else
+		ctrl->byte--;
+
+	return ret;
+}
+
+/* Before poping a register check whether it is feasible or not */
+static int unwind_pop_register(struct unwind_ctrl_block *ctrl,
+				unsigned long **vsp, unsigned int reg)
+{
+	if (unlikely(ctrl->check_each_pop))
+		if (*vsp >= (unsigned long *)ctrl->sp_high)
+			return -URC_FAILURE;
+
+	ctrl->vrs[reg] = *(*vsp)++;
+	return URC_OK;
+}
+
+/* Helper functions to execute the instructions */
+static int unwind_exec_pop_subset_r4_to_r13(struct unwind_ctrl_block *ctrl,
+						unsigned long mask)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int load_sp, reg = 4;
+
+	load_sp = mask & (1 << (13 - 4));
+	while (mask) {
+		if (mask & 1)
+			if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+		mask >>= 1;
+		reg++;
+	}
+	if (!load_sp)
+		ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
+static int unwind_exec_pop_r4_to_rN(struct unwind_ctrl_block *ctrl,
+					unsigned long insn)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int reg;
+
+	/* pop R4-R[4+bbb] */
+	for (reg = 4; reg <= 4 + (insn & 7); reg++)
+		if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+
+	if (insn & 0x8)
+		if (unwind_pop_register(ctrl, &vsp, 14))
+				return -URC_FAILURE;
+
+	ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
+static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
+						unsigned long mask)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int reg = 0;
+
+	/* pop R0-R3 according to mask */
+	while (mask) {
+		if (mask & 1)
+			if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+		mask >>= 1;
+		reg++;
+	}
+	ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
+/*
+ * Execute the current unwind instruction.
+ */
+static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
+{
+	unsigned long insn = unwind_get_byte(ctrl);
+	int ret = URC_OK;
+
+	pr_debug("%s: insn = %08lx\n", __func__, insn);
+
+	if ((insn & 0xc0) == 0x00)
+		ctrl->vrs[SP] += ((insn & 0x3f) << 2) + 4;
+	else if ((insn & 0xc0) == 0x40)
+		ctrl->vrs[SP] -= ((insn & 0x3f) << 2) + 4;
+	else if ((insn & 0xf0) == 0x80) {
+		unsigned long mask;
+
+		insn = (insn << 8) | unwind_get_byte(ctrl);
+		mask = insn & 0x0fff;
+		if (mask == 0) {
+			pr_warn("unwind: 'Refuse to unwind' instruction %04lx\n",
+				insn);
+			return -URC_FAILURE;
+		}
+
+		ret = unwind_exec_pop_subset_r4_to_r13(ctrl, mask);
+		if (ret)
+			goto error;
+	} else if ((insn & 0xf0) == 0x90 &&
+		   (insn & 0x0d) != 0x0d)
+		ctrl->vrs[SP] = ctrl->vrs[insn & 0x0f];
+	else if ((insn & 0xf0) == 0xa0) {
+		ret = unwind_exec_pop_r4_to_rN(ctrl, insn);
+		if (ret)
+			goto error;
+	} else if (insn == 0xb0) {
+		if (ctrl->vrs[PC] == 0)
+			ctrl->vrs[PC] = ctrl->vrs[LR];
+		/* no further processing */
+		ctrl->entries = 0;
+	} else if (insn == 0xb1) {
+		unsigned long mask = unwind_get_byte(ctrl);
+
+		if (mask == 0 || mask & 0xf0) {
+			pr_warn("unwind: Spare encoding %04lx\n",
+				(insn << 8) | mask);
+			return -URC_FAILURE;
+		}
+
+		ret = unwind_exec_pop_subset_r0_to_r3(ctrl, mask);
+		if (ret)
+			goto error;
+	} else if (insn == 0xb2) {
+		unsigned long uleb128 = unwind_get_byte(ctrl);
+
+		ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
+	} else {
+		pr_warn("unwind: Unhandled instruction %02lx\n", insn);
+		return -URC_FAILURE;
+	}
+
+	pr_debug("%s: fp = %08lx sp = %08lx lr = %08lx pc = %08lx\n", __func__,
+		 ctrl->vrs[FP], ctrl->vrs[SP], ctrl->vrs[LR], ctrl->vrs[PC]);
+
+error:
+	return ret;
+}
+
+/*
+ * Unwind a single frame starting with *sp for the symbol at *pc. It
+ * updates the *pc and *sp with the new values.
+ */
+int unwind_frame(struct stackframe *frame)
+{
+	unsigned long low;
+	const struct unwind_idx *idx;
+	struct unwind_ctrl_block ctrl;
+
+	/* store the highest address on the stack to avoid crossing it*/
+	low = frame->sp;
+	ctrl.sp_high = ALIGN(low, THREAD_SIZE);
+
+	pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__,
+		 frame->pc, frame->lr, frame->sp);
+
+	if (!kernel_text_address(frame->pc))
+		return -URC_FAILURE;
+
+	idx = unwind_find_idx(frame->pc);
+	if (!idx) {
+		pr_warn("unwind: Index not found %08lx\n", frame->pc);
+		return -URC_FAILURE;
+	}
+
+	ctrl.vrs[FP] = frame->fp;
+	ctrl.vrs[SP] = frame->sp;
+	ctrl.vrs[LR] = frame->lr;
+	ctrl.vrs[PC] = 0;
+
+	if (idx->insn == 1)
+		/* can't unwind */
+		return -URC_FAILURE;
+	else if ((idx->insn & 0x80000000) == 0)
+		/* prel31 to the unwind table */
+		ctrl.insn = (unsigned long *)prel31_to_addr(&idx->insn);
+	else if ((idx->insn & 0xff000000) == 0x80000000)
+		/* only personality routine 0 supported in the index */
+		ctrl.insn = &idx->insn;
+	else {
+		pr_warn("unwind: Unsupported personality routine %08lx in the index at %p\n",
+			idx->insn, idx);
+		return -URC_FAILURE;
+	}
+
+	/* check the personality routine */
+	if ((*ctrl.insn & 0xff000000) == 0x80000000) {
+		ctrl.byte = 2;
+		ctrl.entries = 1;
+	} else if ((*ctrl.insn & 0xff000000) == 0x81000000) {
+		ctrl.byte = 1;
+		ctrl.entries = 1 + ((*ctrl.insn & 0x00ff0000) >> 16);
+	} else {
+		pr_warn("unwind: Unsupported personality routine %08lx at %p\n",
+			*ctrl.insn, ctrl.insn);
+		return -URC_FAILURE;
+	}
+
+	ctrl.check_each_pop = 0;
+
+	while (ctrl.entries > 0) {
+		int urc;
+		if ((ctrl.sp_high - ctrl.vrs[SP]) < sizeof(ctrl.vrs))
+			ctrl.check_each_pop = 1;
+		urc = unwind_exec_insn(&ctrl);
+		if (urc < 0)
+			return urc;
+		if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= ctrl.sp_high)
+			return -URC_FAILURE;
+	}
+
+	if (ctrl.vrs[PC] == 0)
+		ctrl.vrs[PC] = ctrl.vrs[LR];
+
+	/* check for infinite loop */
+	if (frame->pc == ctrl.vrs[PC])
+		return -URC_FAILURE;
+
+	frame->fp = ctrl.vrs[FP];
+	frame->sp = ctrl.vrs[SP];
+	frame->lr = ctrl.vrs[LR];
+	frame->pc = ctrl.vrs[PC];
+
+	return URC_OK;
+}
+
+void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+{
+	struct stackframe frame;
+
+	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+
+	if (!tsk)
+		tsk = current;
+
+	if (regs) {
+		arm_get_current_stackframe(regs, &frame);
+		/* PC might be corrupted, use LR in that case. */
+		if (!kernel_text_address(regs->ARM_pc))
+			frame.pc = regs->ARM_lr;
+	} else if (tsk == current) {
+		frame.fp = (unsigned long)__builtin_frame_address(0);
+		frame.sp = current_stack_pointer;
+		frame.lr = (unsigned long)__builtin_return_address(0);
+		frame.pc = (unsigned long)unwind_backtrace;
+	} else {
+		/* task blocked in __switch_to */
+		frame.fp = thread_saved_fp(tsk);
+		frame.sp = thread_saved_sp(tsk);
+		/*
+		 * The function calling __switch_to cannot be a leaf function
+		 * so LR is recovered from the stack.
+		 */
+		frame.lr = 0;
+		frame.pc = thread_saved_pc(tsk);
+	}
+
+	while (1) {
+		int urc;
+		unsigned long where = frame.pc;
+
+		urc = unwind_frame(&frame);
+		if (urc < 0)
+			break;
+		dump_backtrace_entry(where, frame.pc, frame.sp - 4);
+	}
+}
+
+struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
+				      unsigned long text_addr,
+				      unsigned long text_size)
+{
+	unsigned long flags;
+	struct unwind_table *tab = kmalloc(sizeof(*tab), GFP_KERNEL);
+
+	pr_debug("%s(%08lx, %08lx, %08lx, %08lx)\n", __func__, start, size,
+		 text_addr, text_size);
+
+	if (!tab)
+		return tab;
+
+	tab->start = (const struct unwind_idx *)start;
+	tab->stop = (const struct unwind_idx *)(start + size);
+	tab->origin = unwind_find_origin(tab->start, tab->stop);
+	tab->begin_addr = text_addr;
+	tab->end_addr = text_addr + text_size;
+
+	spin_lock_irqsave(&unwind_lock, flags);
+	list_add_tail(&tab->list, &unwind_tables);
+	spin_unlock_irqrestore(&unwind_lock, flags);
+
+	return tab;
+}
+
+void unwind_table_del(struct unwind_table *tab)
+{
+	unsigned long flags;
+
+	if (!tab)
+		return;
+
+	spin_lock_irqsave(&unwind_lock, flags);
+	list_del(&tab->list);
+	spin_unlock_irqrestore(&unwind_lock, flags);
+
+	kfree(tab);
+}
diff --git a/arch/arm/kernel/v7m.c b/arch/arm/kernel/v7m.c
new file mode 100644
index 000000000..4d2cba94f
--- /dev/null
+++ b/arch/arm/kernel/v7m.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2013 Uwe Kleine-Koenig for Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+#include <linux/io.h>
+#include <linux/reboot.h>
+#include <asm/barrier.h>
+#include <asm/v7m.h>
+
+void armv7m_restart(enum reboot_mode mode, const char *cmd)
+{
+	dsb();
+	__raw_writel(V7M_SCB_AIRCR_VECTKEY | V7M_SCB_AIRCR_SYSRESETREQ,
+			BASEADDR_V7M_SCB + V7M_SCB_AIRCR);
+	dsb();
+}
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
new file mode 100644
index 000000000..efe17dd9b
--- /dev/null
+++ b/arch/arm/kernel/vdso.c
@@ -0,0 +1,337 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Copyright (C) 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/elf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/vmalloc.h>
+#include <asm/arch_timer.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <clocksource/arm_arch_timer.h>
+
+#define MAX_SYMNAME	64
+
+static struct page **vdso_text_pagelist;
+
+/* Total number of pages needed for the data and text portions of the VDSO. */
+unsigned int vdso_total_pages __read_mostly;
+
+/*
+ * The VDSO data page.
+ */
+static union vdso_data_store vdso_data_store __page_aligned_data;
+static struct vdso_data *vdso_data = &vdso_data_store.data;
+
+static struct page *vdso_data_page;
+static struct vm_special_mapping vdso_data_mapping = {
+	.name = "[vvar]",
+	.pages = &vdso_data_page,
+};
+
+static struct vm_special_mapping vdso_text_mapping = {
+	.name = "[vdso]",
+};
+
+struct elfinfo {
+	Elf32_Ehdr	*hdr;		/* ptr to ELF */
+	Elf32_Sym	*dynsym;	/* ptr to .dynsym section */
+	unsigned long	dynsymsize;	/* size of .dynsym section */
+	char		*dynstr;	/* ptr to .dynstr section */
+};
+
+/* Cached result of boot-time check for whether the arch timer exists,
+ * and if so, whether the virtual counter is useable.
+ */
+static bool cntvct_ok __read_mostly;
+
+static bool __init cntvct_functional(void)
+{
+	struct device_node *np;
+	bool ret = false;
+
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		goto out;
+
+	/* The arm_arch_timer core should export
+	 * arch_timer_use_virtual or similar so we don't have to do
+	 * this.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
+	if (!np)
+		goto out_put;
+
+	if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
+		goto out_put;
+
+	ret = true;
+
+out_put:
+	of_node_put(np);
+out:
+	return ret;
+}
+
+static void * __init find_section(Elf32_Ehdr *ehdr, const char *name,
+				  unsigned long *size)
+{
+	Elf32_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+
+	if (size)
+		*size = 0;
+	return NULL;
+}
+
+static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+		char name[MAX_SYMNAME], *c;
+
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+			MAX_SYMNAME);
+		c = strchr(name, '@');
+		if (c)
+			*c = 0;
+		if (strcmp(symname, name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname)
+{
+	Elf32_Sym *sym;
+
+	sym = find_symbol(lib, symname);
+	if (!sym)
+		return;
+
+	sym->st_name = 0;
+}
+
+static void __init patch_vdso(void *ehdr)
+{
+	struct elfinfo einfo;
+
+	einfo = (struct elfinfo) {
+		.hdr = ehdr,
+	};
+
+	einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize);
+	einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL);
+
+	/* If the virtual counter is absent or non-functional we don't
+	 * want programs to incur the slight additional overhead of
+	 * dispatching through the VDSO only to fall back to syscalls.
+	 */
+	if (!cntvct_ok) {
+		vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
+		vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
+	}
+}
+
+static int __init vdso_init(void)
+{
+	unsigned int text_pages;
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("VDSO is not a valid ELF object!\n");
+		return -ENOEXEC;
+	}
+
+	text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+	pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start);
+
+	/* Allocate the VDSO text pagelist */
+	vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
+				     GFP_KERNEL);
+	if (vdso_text_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the VDSO data page. */
+	vdso_data_page = virt_to_page(vdso_data);
+
+	/* Grab the VDSO text pages. */
+	for (i = 0; i < text_pages; i++) {
+		struct page *page;
+
+		page = virt_to_page(&vdso_start + i * PAGE_SIZE);
+		vdso_text_pagelist[i] = page;
+	}
+
+	vdso_text_mapping.pages = vdso_text_pagelist;
+
+	vdso_total_pages = 1; /* for the data/vvar page */
+	vdso_total_pages += text_pages;
+
+	cntvct_ok = cntvct_functional();
+
+	patch_vdso(&vdso_start);
+
+	return 0;
+}
+arch_initcall(vdso_init);
+
+static int install_vvar(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+
+	vma = _install_special_mapping(mm, addr, PAGE_SIZE,
+				       VM_READ | VM_MAYREAD,
+				       &vdso_data_mapping);
+
+	return IS_ERR(vma) ? PTR_ERR(vma) : 0;
+}
+
+/* assumes mmap_sem is write-locked */
+void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+	unsigned long len;
+
+	mm->context.vdso = 0;
+
+	if (vdso_text_pagelist == NULL)
+		return;
+
+	if (install_vvar(mm, addr))
+		return;
+
+	/* Account for vvar page. */
+	addr += PAGE_SIZE;
+	len = (vdso_total_pages - 1) << PAGE_SHIFT;
+
+	vma = _install_special_mapping(mm, addr, len,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&vdso_text_mapping);
+
+	if (!IS_ERR(vma))
+		mm->context.vdso = addr;
+}
+
+static void vdso_write_begin(struct vdso_data *vdata)
+{
+	++vdso_data->seq_count;
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */
+}
+
+static void vdso_write_end(struct vdso_data *vdata)
+{
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */
+	++vdso_data->seq_count;
+}
+
+static bool tk_is_cntvct(const struct timekeeper *tk)
+{
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		return false;
+
+	if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0)
+		return false;
+
+	return true;
+}
+
+/**
+ * update_vsyscall - update the vdso data page
+ *
+ * Increment the sequence counter, making it odd, indicating to
+ * userspace that an update is in progress.  Update the fields used
+ * for coarse clocks and, if the architected system timer is in use,
+ * the fields used for high precision clocks.  Increment the sequence
+ * counter again, making it even, indicating to userspace that the
+ * update is finished.
+ *
+ * Userspace is expected to sample seq_count before reading any other
+ * fields from the data page.  If seq_count is odd, userspace is
+ * expected to wait until it becomes even.  After copying data from
+ * the page, userspace must sample seq_count again; if it has changed
+ * from its previous value, userspace must retry the whole sequence.
+ *
+ * Calls to update_vsyscall are serialized by the timekeeping core.
+ */
+void update_vsyscall(struct timekeeper *tk)
+{
+	struct timespec xtime_coarse;
+	struct timespec64 *wtm = &tk->wall_to_monotonic;
+
+	if (!cntvct_ok) {
+		/* The entry points have been zeroed, so there is no
+		 * point in updating the data page.
+		 */
+		return;
+	}
+
+	vdso_write_begin(vdso_data);
+
+	xtime_coarse = __current_kernel_time();
+	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
+	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
+	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->wtm_clock_sec		= wtm->tv_sec;
+	vdso_data->wtm_clock_nsec		= wtm->tv_nsec;
+
+	if (vdso_data->tk_is_cntvct) {
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
+		vdso_data->xtime_clock_sec	= tk->xtime_sec;
+		vdso_data->xtime_clock_snsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
+		vdso_data->cs_mask		= tk->tkr_mono.mask;
+	}
+
+	vdso_write_end(vdso_data);
+
+	flush_dcache_page(virt_to_page(vdso_data));
+}
+
+void update_vsyscall_tz(void)
+{
+	vdso_data->tz_minuteswest	= sys_tz.tz_minuteswest;
+	vdso_data->tz_dsttime		= sys_tz.tz_dsttime;
+	flush_dcache_page(virt_to_page(vdso_data));
+}
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
new file mode 100644
index 000000000..8b60fde5c
--- /dev/null
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -0,0 +1,353 @@
+/* ld script to make ARM Linux kernel
+ * taken from the i386 version by Russell King
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+#include <asm/pgtable.h>
+#endif
+
+#define PROC_INFO							\
+	. = ALIGN(4);							\
+	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
+	*(.proc.info.init)						\
+	VMLINUX_SYMBOL(__proc_info_end) = .;
+
+#define IDMAP_TEXT							\
+	ALIGN_FUNCTION();						\
+	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
+	*(.idmap.text)							\
+	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
+	*(.hyp.idmap.text)						\
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define ARM_CPU_DISCARD(x)
+#define ARM_CPU_KEEP(x)		x
+#else
+#define ARM_CPU_DISCARD(x)	x
+#define ARM_CPU_KEEP(x)
+#endif
+
+#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
+	defined(CONFIG_GENERIC_BUG)
+#define ARM_EXIT_KEEP(x)	x
+#define ARM_EXIT_DISCARD(x)
+#else
+#define ARM_EXIT_KEEP(x)
+#define ARM_EXIT_DISCARD(x)	x
+#endif
+
+OUTPUT_ARCH(arm)
+ENTRY(stext)
+
+#ifndef __ARMEB__
+jiffies = jiffies_64;
+#else
+jiffies = jiffies_64 + 4;
+#endif
+
+SECTIONS
+{
+	/*
+	 * XXX: The linker does not define how output sections are
+	 * assigned to input sections when there are multiple statements
+	 * matching the same input section name.  There is no documented
+	 * order of matching.
+	 *
+	 * unwind exit sections must be discarded before the rest of the
+	 * unwind sections get included.
+	 */
+	/DISCARD/ : {
+		*(.ARM.exidx.exit.text)
+		*(.ARM.extab.exit.text)
+		ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))
+		ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))
+		ARM_EXIT_DISCARD(EXIT_TEXT)
+		ARM_EXIT_DISCARD(EXIT_DATA)
+		EXIT_CALL
+#ifndef CONFIG_MMU
+		*(.text.fixup)
+		*(__ex_table)
+#endif
+#ifndef CONFIG_SMP_ON_UP
+		*(.alt.smp.init)
+#endif
+		*(.discard)
+		*(.discard.*)
+	}
+
+#ifdef CONFIG_XIP_KERNEL
+	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
+#else
+	. = PAGE_OFFSET + TEXT_OFFSET;
+#endif
+	.head.text : {
+		_text = .;
+		HEAD_TEXT
+	}
+
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
+
+	.text : {			/* Real text segment		*/
+		_stext = .;		/* Text and read-only data	*/
+			IDMAP_TEXT
+			__exception_text_start = .;
+			*(.exception.text)
+			__exception_text_end = .;
+			IRQENTRY_TEXT
+			TEXT_TEXT
+			SCHED_TEXT
+			LOCK_TEXT
+			KPROBES_TEXT
+			*(.gnu.warning)
+			*(.glue_7)
+			*(.glue_7t)
+		. = ALIGN(4);
+		*(.got)			/* Global offset table		*/
+			ARM_CPU_KEEP(PROC_INFO)
+	}
+
+#ifdef CONFIG_DEBUG_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
+	RO_DATA(PAGE_SIZE)
+
+	. = ALIGN(4);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+#ifdef CONFIG_MMU
+		*(__ex_table)
+#endif
+		__stop___ex_table = .;
+	}
+
+#ifdef CONFIG_ARM_UNWIND
+	/*
+	 * Stack unwinding tables
+	 */
+	. = ALIGN(8);
+	.ARM.unwind_idx : {
+		__start_unwind_idx = .;
+		*(.ARM.exidx*)
+		__stop_unwind_idx = .;
+	}
+	.ARM.unwind_tab : {
+		__start_unwind_tab = .;
+		*(.ARM.extab*)
+		__stop_unwind_tab = .;
+	}
+#endif
+
+	NOTES
+
+	_etext = .;			/* End of text and rodata section */
+
+#ifndef CONFIG_XIP_KERNEL
+# ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+# else
+	. = ALIGN(PAGE_SIZE);
+# endif
+	__init_begin = .;
+#endif
+	/*
+	 * The vectors and stubs are relocatable code, and the
+	 * only thing that matters is their relative offsets
+	 */
+	__vectors_start = .;
+	.vectors 0 : AT(__vectors_start) {
+		*(.vectors)
+	}
+	. = __vectors_start + SIZEOF(.vectors);
+	__vectors_end = .;
+
+	__stubs_start = .;
+	.stubs 0x1000 : AT(__stubs_start) {
+		*(.stubs)
+	}
+	. = __stubs_start + SIZEOF(.stubs);
+	__stubs_end = .;
+
+	INIT_TEXT_SECTION(8)
+	.exit.text : {
+		ARM_EXIT_KEEP(EXIT_TEXT)
+	}
+	.init.proc.info : {
+		ARM_CPU_DISCARD(PROC_INFO)
+	}
+	.init.arch.info : {
+		__arch_info_begin = .;
+		*(.arch.info.init)
+		__arch_info_end = .;
+	}
+	.init.tagtable : {
+		__tagtable_begin = .;
+		*(.taglist.init)
+		__tagtable_end = .;
+	}
+#ifdef CONFIG_SMP_ON_UP
+	.init.smpalt : {
+		__smpalt_begin = .;
+		*(.alt.smp.init)
+		__smpalt_end = .;
+	}
+#endif
+	.init.pv_table : {
+		__pv_table_begin = .;
+		*(.pv_table)
+		__pv_table_end = .;
+	}
+	.init.data : {
+#ifndef CONFIG_XIP_KERNEL
+		INIT_DATA
+#endif
+		INIT_SETUP(16)
+		INIT_CALLS
+		CON_INITCALL
+		SECURITY_INITCALL
+		INIT_RAM_FS
+	}
+#ifndef CONFIG_XIP_KERNEL
+	.exit.data : {
+		ARM_EXIT_KEEP(EXIT_DATA)
+	}
+#endif
+
+#ifdef CONFIG_SMP
+	PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
+
+#ifdef CONFIG_XIP_KERNEL
+	__data_loc = ALIGN(4);		/* location in binary */
+	. = PAGE_OFFSET + TEXT_OFFSET;
+#else
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+#else
+	. = ALIGN(THREAD_SIZE);
+#endif
+	__init_end = .;
+	__data_loc = .;
+#endif
+
+	.data : AT(__data_loc) {
+		_data = .;		/* address in memory */
+		_sdata = .;
+
+		/*
+		 * first, the init task union, aligned
+		 * to an 8192 byte boundary.
+		 */
+		INIT_TASK_DATA(THREAD_SIZE)
+
+#ifdef CONFIG_XIP_KERNEL
+		. = ALIGN(PAGE_SIZE);
+		__init_begin = .;
+		INIT_DATA
+		ARM_EXIT_KEEP(EXIT_DATA)
+		. = ALIGN(PAGE_SIZE);
+		__init_end = .;
+#endif
+
+		NOSAVE_DATA
+		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
+		READ_MOSTLY_DATA(L1_CACHE_BYTES)
+
+		/*
+		 * and the usual data section
+		 */
+		DATA_DATA
+		CONSTRUCTORS
+
+		_edata = .;
+	}
+	_edata_loc = __data_loc + SIZEOF(.data);
+
+#ifdef CONFIG_HAVE_TCM
+        /*
+	 * We align everything to a page boundary so we can
+	 * free it after init has commenced and TCM contents have
+	 * been copied to its destination.
+	 */
+	.tcm_start : {
+		. = ALIGN(PAGE_SIZE);
+		__tcm_start = .;
+		__itcm_start = .;
+	}
+
+	/*
+	 * Link these to the ITCM RAM
+	 * Put VMA to the TCM address and LMA to the common RAM
+	 * and we'll upload the contents from RAM to TCM and free
+	 * the used RAM after that.
+	 */
+	.text_itcm ITCM_OFFSET : AT(__itcm_start)
+	{
+		__sitcm_text = .;
+		*(.tcm.text)
+		*(.tcm.rodata)
+		. = ALIGN(4);
+		__eitcm_text = .;
+	}
+
+	/*
+	 * Reset the dot pointer, this is needed to create the
+	 * relative __dtcm_start below (to be used as extern in code).
+	 */
+	. = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
+
+	.dtcm_start : {
+		__dtcm_start = .;
+	}
+
+	/* TODO: add remainder of ITCM as well, that can be used for data! */
+	.data_dtcm DTCM_OFFSET : AT(__dtcm_start)
+	{
+		. = ALIGN(4);
+		__sdtcm_data = .;
+		*(.tcm.data)
+		. = ALIGN(4);
+		__edtcm_data = .;
+	}
+
+	/* Reset the dot pointer or the linker gets confused */
+	. = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
+
+	/* End marker for freeing TCM copy in linked object */
+	.tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
+		. = ALIGN(PAGE_SIZE);
+		__tcm_end = .;
+	}
+#endif
+
+	BSS_SECTION(0, 0, 0)
+	_end = .;
+
+	STABS_DEBUG
+}
+
+/*
+ * These must never be empty
+ * If you have to comment these two assert statements out, your
+ * binutils is too old (for other reasons as well)
+ */
+ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
+ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
+
+/*
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
+ * The above comment applies as well.
+ */
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
+	"HYP init code too big or misaligned")
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
new file mode 100644
index 000000000..bdbb8853a
--- /dev/null
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -0,0 +1,177 @@
+/*
+ * linux/arch/arm/kernel/xscale-cp0.c
+ *
+ * XScale DSP and iWMMXt coprocessor context switching and handling
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/thread_notify.h>
+
+static inline void dsp_save_state(u32 *state)
+{
+	__asm__ __volatile__ (
+		"mrrc	p0, 0, %0, %1, c0\n"
+		: "=r" (state[0]), "=r" (state[1]));
+}
+
+static inline void dsp_load_state(u32 *state)
+{
+	__asm__ __volatile__ (
+		"mcrr	p0, 0, %0, %1, c0\n"
+		: : "r" (state[0]), "r" (state[1]));
+}
+
+static int dsp_do(struct notifier_block *self, unsigned long cmd, void *t)
+{
+	struct thread_info *thread = t;
+
+	switch (cmd) {
+	case THREAD_NOTIFY_FLUSH:
+		thread->cpu_context.extra[0] = 0;
+		thread->cpu_context.extra[1] = 0;
+		break;
+
+	case THREAD_NOTIFY_SWITCH:
+		dsp_save_state(current_thread_info()->cpu_context.extra);
+		dsp_load_state(thread->cpu_context.extra);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dsp_notifier_block = {
+	.notifier_call	= dsp_do,
+};
+
+
+#ifdef CONFIG_IWMMXT
+static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t)
+{
+	struct thread_info *thread = t;
+
+	switch (cmd) {
+	case THREAD_NOTIFY_FLUSH:
+		/*
+		 * flush_thread() zeroes thread->fpstate, so no need
+		 * to do anything here.
+		 *
+		 * FALLTHROUGH: Ensure we don't try to overwrite our newly
+		 * initialised state information on the first fault.
+		 */
+
+	case THREAD_NOTIFY_EXIT:
+		iwmmxt_task_release(thread);
+		break;
+
+	case THREAD_NOTIFY_SWITCH:
+		iwmmxt_task_switch(thread);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block iwmmxt_notifier_block = {
+	.notifier_call	= iwmmxt_do,
+};
+#endif
+
+
+static u32 __init xscale_cp_access_read(void)
+{
+	u32 value;
+
+	__asm__ __volatile__ (
+		"mrc	p15, 0, %0, c15, c1, 0\n\t"
+		: "=r" (value));
+
+	return value;
+}
+
+static void __init xscale_cp_access_write(u32 value)
+{
+	u32 temp;
+
+	__asm__ __volatile__ (
+		"mcr	p15, 0, %1, c15, c1, 0\n\t"
+		"mrc	p15, 0, %0, c15, c1, 0\n\t"
+		"mov	%0, %0\n\t"
+		"sub	pc, pc, #4\n\t"
+		: "=r" (temp) : "r" (value));
+}
+
+/*
+ * Detect whether we have a MAC coprocessor (40 bit register) or an
+ * iWMMXt coprocessor (64 bit registers) by loading 00000100:00000000
+ * into a coprocessor register and reading it back, and checking
+ * whether the upper word survived intact.
+ */
+static int __init cpu_has_iwmmxt(void)
+{
+	u32 lo;
+	u32 hi;
+
+	/*
+	 * This sequence is interpreted by the DSP coprocessor as:
+	 *	mar	acc0, %2, %3
+	 *	mra	%0, %1, acc0
+	 *
+	 * And by the iWMMXt coprocessor as:
+	 *	tmcrr	wR0, %2, %3
+	 *	tmrrc	%0, %1, wR0
+	 */
+	__asm__ __volatile__ (
+		"mcrr	p0, 0, %2, %3, c0\n"
+		"mrrc	p0, 0, %0, %1, c0\n"
+		: "=r" (lo), "=r" (hi)
+		: "r" (0), "r" (0x100));
+
+	return !!hi;
+}
+
+
+/*
+ * If we detect that the CPU has iWMMXt (and CONFIG_IWMMXT=y), we
+ * disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy
+ * switch code handle iWMMXt context switching.  If on the other
+ * hand the CPU has a DSP coprocessor, we keep access to CP0 enabled
+ * all the time, and save/restore acc0 on context switch in non-lazy
+ * fashion.
+ */
+static int __init xscale_cp0_init(void)
+{
+	u32 cp_access;
+
+	cp_access = xscale_cp_access_read() & ~3;
+	xscale_cp_access_write(cp_access | 1);
+
+	if (cpu_has_iwmmxt()) {
+#ifndef CONFIG_IWMMXT
+		pr_warn("CAUTION: XScale iWMMXt coprocessor detected, but kernel support is missing.\n");
+#else
+		pr_info("XScale iWMMXt coprocessor detected.\n");
+		elf_hwcap |= HWCAP_IWMMXT;
+		thread_register_notifier(&iwmmxt_notifier_block);
+#endif
+	} else {
+		pr_info("XScale DSP coprocessor detected.\n");
+		thread_register_notifier(&dsp_notifier_block);
+		cp_access |= 1;
+	}
+
+	xscale_cp_access_write(cp_access);
+
+	return 0;
+}
+
+late_initcall(xscale_cp0_init);