331 files changed, 52357 insertions, 0 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
new file mode 100644
index 000000000..7796af4b1
--- /dev/null
+++ b/arch/arm64/Kconfig
@@ -0,0 +1,779 @@
+config ARM64
+	def_bool y
+	select ACPI_GENERIC_GSI if ACPI
+	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+	select ARCH_WANT_FRAME_POINTERS
+	select ARM_AMBA
+	select ARM_ARCH_TIMER
+	select ARM_GIC
+	select AUDIT_ARCH_COMPAT_GENERIC
+	select ARM_GIC_V2M if PCI_MSI
+	select ARM_GIC_V3
+	select ARM_GIC_V3_ITS if PCI_MSI
+	select BUILDTIME_EXTABLE_SORT
+	select CLONE_BACKWARDS
+	select COMMON_CLK
+	select CPU_PM if (SUSPEND || CPU_IDLE)
+	select DCACHE_WORD_ACCESS
+	select GENERIC_ALLOCATOR
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_EARLY_IOREMAP
+	select GENERIC_IRQ_PROBE
+	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
+	select GENERIC_PCI_IOMAP
+	select GENERIC_SCHED_CLOCK
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
+	select GENERIC_TIME_VSYSCALL
+	select HANDLE_DOMAIN_IRQ
+	select HARDIRQS_SW_RESEND
+	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_BITREVERSE
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_BPF_JIT
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_CC_STACKPROTECTOR
+	select HAVE_CMPXCHG_DOUBLE
+	select HAVE_DEBUG_BUGVERBOSE
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DMA_API_DEBUG
+	select HAVE_DMA_ATTRS
+	select HAVE_DMA_CONTIGUOUS
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_HW_BREAKPOINT if PERF_EVENTS
+	select HAVE_MEMBLOCK
+	select HAVE_PATA_PLATFORM
+	select HAVE_PERF_EVENTS
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_RCU_TABLE_FREE
+	select HAVE_SYSCALL_TRACEPOINTS
+	select IRQ_DOMAIN
+	select MODULES_USE_ELF_RELA
+	select NO_BOOTMEM
+	select OF
+	select OF_EARLY_FLATTREE
+	select OF_RESERVED_MEM
+	select PERF_USE_VMALLOC
+	select POWER_RESET
+	select POWER_SUPPLY
+	select RTC_LIB
+	select SPARSE_IRQ
+	select SYSCTL_EXCEPTION_TRACE
+	select HAVE_CONTEXT_TRACKING
+	help
+	  ARM 64-bit (AArch64) Linux support.
+
+config 64BIT
+	def_bool y
+
+config ARCH_PHYS_ADDR_T_64BIT
+	def_bool y
+
+config MMU
+	def_bool y
+
+config NO_IOPORT_MAP
+	def_bool y if !PCI
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
+config RWSEM_XCHGADD_ALGORITHM
+	def_bool y
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_CSUM
+        def_bool y
+
+config GENERIC_CALIBRATE_DELAY
+	def_bool y
+
+config ZONE_DMA
+	def_bool y
+
+config HAVE_GENERIC_RCU_GUP
+	def_bool y
+
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool y
+
+config NEED_DMA_MAP_STATE
+	def_bool y
+
+config NEED_SG_DMA_LENGTH
+	def_bool y
+
+config SWIOTLB
+	def_bool y
+
+config IOMMU_HELPER
+	def_bool SWIOTLB
+
+config KERNEL_MODE_NEON
+	def_bool y
+
+config FIX_EARLYCON_MEM
+	def_bool y
+
+config PGTABLE_LEVELS
+	int
+	default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
+	default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
+	default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
+	default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
+
+source "init/Kconfig"
+
+source "kernel/Kconfig.freezer"
+
+menu "Platform selection"
+
+config ARCH_EXYNOS
+	bool
+	help
+	  This enables support for Samsung Exynos SoC family
+
+config ARCH_EXYNOS7
+	bool "ARMv8 based Samsung Exynos7"
+	select ARCH_EXYNOS
+	select COMMON_CLK_SAMSUNG
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
+	select HAVE_S3C_RTC if RTC_CLASS
+	select PINCTRL
+	select PINCTRL_EXYNOS
+
+	help
+	  This enables support for Samsung Exynos7 SoC family
+
+config ARCH_FSL_LS2085A
+	bool "Freescale LS2085A SOC"
+	help
+	  This enables support for Freescale LS2085A SOC.
+
+config ARCH_MEDIATEK
+	bool "Mediatek MT65xx & MT81xx ARMv8 SoC"
+	select ARM_GIC
+	select PINCTRL
+	help
+	  Support for Mediatek MT65xx & MT81xx ARMv8 SoCs
+
+config ARCH_QCOM
+	bool "Qualcomm Platforms"
+	select PINCTRL
+	help
+	  This enables support for the ARMv8 based Qualcomm chipsets.
+
+config ARCH_SEATTLE
+	bool "AMD Seattle SoC Family"
+	help
+	  This enables support for AMD Seattle SOC Family
+
+config ARCH_TEGRA
+	bool "NVIDIA Tegra SoC Family"
+	select ARCH_HAS_RESET_CONTROLLER
+	select ARCH_REQUIRE_GPIOLIB
+	select CLKDEV_LOOKUP
+	select CLKSRC_MMIO
+	select CLKSRC_OF
+	select GENERIC_CLOCKEVENTS
+	select HAVE_CLK
+	select PINCTRL
+	select RESET_CONTROLLER
+	help
+	  This enables support for the NVIDIA Tegra SoC family.
+
+config ARCH_TEGRA_132_SOC
+	bool "NVIDIA Tegra132 SoC"
+	depends on ARCH_TEGRA
+	select PINCTRL_TEGRA124
+	select USB_ULPI if USB_PHY
+	select USB_ULPI_VIEWPORT if USB_PHY
+	help
+	  Enable support for NVIDIA Tegra132 SoC, based on the Denver
+	  ARMv8 CPU.  The Tegra132 SoC is similar to the Tegra124 SoC,
+	  but contains an NVIDIA Denver CPU complex in place of
+	  Tegra124's "4+1" Cortex-A15 CPU complex.
+
+config ARCH_SPRD
+	bool "Spreadtrum SoC platform"
+	help
+	  Support for Spreadtrum ARM based SoCs
+
+config ARCH_THUNDER
+	bool "Cavium Inc. Thunder SoC Family"
+	help
+	  This enables support for Cavium's Thunder Family of SoCs.
+
+config ARCH_VEXPRESS
+	bool "ARMv8 software model (Versatile Express)"
+	select ARCH_REQUIRE_GPIOLIB
+	select COMMON_CLK_VERSATILE
+	select POWER_RESET_VEXPRESS
+	select VEXPRESS_CONFIG
+	help
+	  This enables support for the ARMv8 software model (Versatile
+	  Express).
+
+config ARCH_XGENE
+	bool "AppliedMicro X-Gene SOC Family"
+	help
+	  This enables support for AppliedMicro X-Gene SOC Family
+
+config ARCH_ZYNQMP
+	bool "Xilinx ZynqMP Family"
+	help
+	  This enables support for Xilinx ZynqMP Family
+
+endmenu
+
+menu "Bus support"
+
+config PCI
+	bool "PCI support"
+	help
+	  This feature enables support for PCI bus system. If you say Y
+	  here, the kernel will include drivers and infrastructure code
+	  to support PCI bus devices.
+
+config PCI_DOMAINS
+	def_bool PCI
+
+config PCI_DOMAINS_GENERIC
+	def_bool PCI
+
+config PCI_SYSCALL
+	def_bool PCI
+
+source "drivers/pci/Kconfig"
+source "drivers/pci/pcie/Kconfig"
+source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+menu "Kernel Features"
+
+menu "ARM errata workarounds via the alternatives framework"
+
+config ARM64_ERRATUM_826319
+	bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 826319 on Cortex-A53 parts up to r0p2 with an AMBA 4 ACE or
+	  AXI master interface and an L2 cache.
+
+	  If a Cortex-A53 uses an AMBA AXI4 ACE interface to other processors
+	  and is unable to accept a certain write via this interface, it will
+	  not progress on read data presented on the read data channel and the
+	  system can deadlock.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_827319
+	bool "Cortex-A53: 827319: Data cache clean instructions might cause overlapping transactions to the interconnect"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 827319 on Cortex-A53 parts up to r0p2 with an AMBA 5 CHI
+	  master interface and an L2 cache.
+
+	  Under certain conditions this erratum can cause a clean line eviction
+	  to occur at the same time as another transaction to the same address
+	  on the AMBA 5 CHI interface, which can cause data corruption if the
+	  interconnect reorders the two transactions.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_824069
+	bool "Cortex-A53: 824069: Cache line might not be marked as clean after a CleanShared snoop"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 824069 on Cortex-A53 parts up to r0p2 when it is connected
+	  to a coherent interconnect.
+
+	  If a Cortex-A53 processor is executing a store or prefetch for
+	  write instruction at the same time as a processor in another
+	  cluster is executing a cache maintenance operation to the same
+	  address, then this erratum might cause a clean cache line to be
+	  incorrectly marked as dirty.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this option does not necessarily enable the
+	  workaround, as it depends on the alternative framework, which will
+	  only patch the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_819472
+	bool "Cortex-A53: 819472: Store exclusive instructions might cause data corruption"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 819472 on Cortex-A53 parts up to r0p1 with an L2 cache
+	  present when it is connected to a coherent interconnect.
+
+	  If the processor is executing a load and store exclusive sequence at
+	  the same time as a processor in another cluster is executing a cache
+	  maintenance operation to the same address, then this erratum might
+	  cause data corruption.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_832075
+	bool "Cortex-A57: 832075: possible deadlock on mixing exclusive memory accesses with device loads"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 832075 on Cortex-A57 parts up to r1p2.
+
+	  Affected Cortex-A57 parts might deadlock when exclusive load/store
+	  instructions to Write-Back memory are mixed with Device loads.
+
+	  The workaround is to promote device loads to use Load-Acquire
+	  semantics.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_845719
+	bool "Cortex-A53: 845719: a load might read incorrect data"
+	depends on COMPAT
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 845719 on Cortex-A53 parts up to r0p4.
+
+	  When running a compat (AArch32) userspace on an affected Cortex-A53
+	  part, a load at EL0 from a virtual address that matches the bottom 32
+	  bits of the virtual address used by a recent load at (AArch64) EL1
+	  might return incorrect data.
+
+	  The workaround is to write the contextidr_el1 register on exception
+	  return to a 32-bit task.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+endmenu
+
+
+choice
+	prompt "Page size"
+	default ARM64_4K_PAGES
+	help
+	  Page size (translation granule) configuration.
+
+config ARM64_4K_PAGES
+	bool "4KB"
+	help
+	  This feature enables 4KB pages support.
+
+config ARM64_64K_PAGES
+	bool "64KB"
+	help
+	  This feature enables 64KB pages support (4KB by default)
+	  allowing only two levels of page tables and faster TLB
+	  look-up. AArch32 emulation is not available when this feature
+	  is enabled.
+
+endchoice
+
+choice
+	prompt "Virtual address space size"
+	default ARM64_VA_BITS_39 if ARM64_4K_PAGES
+	default ARM64_VA_BITS_42 if ARM64_64K_PAGES
+	help
+	  Allows choosing one of multiple possible virtual address
+	  space sizes. The level of translation table is determined by
+	  a combination of page size and virtual address space size.
+
+config ARM64_VA_BITS_39
+	bool "39-bit"
+	depends on ARM64_4K_PAGES
+
+config ARM64_VA_BITS_42
+	bool "42-bit"
+	depends on ARM64_64K_PAGES
+
+config ARM64_VA_BITS_48
+	bool "48-bit"
+
+endchoice
+
+config ARM64_VA_BITS
+	int
+	default 39 if ARM64_VA_BITS_39
+	default 42 if ARM64_VA_BITS_42
+	default 48 if ARM64_VA_BITS_48
+
+config CPU_BIG_ENDIAN
+       bool "Build big-endian kernel"
+       help
+         Say Y if you plan on running a kernel in big-endian mode.
+
+config SMP
+	bool "Symmetric Multi-Processing"
+	help
+	  This enables support for systems with more than one CPU.  If
+	  you say N here, the kernel will run on single and
+	  multiprocessor machines, but will use only one CPU of a
+	  multiprocessor machine. If you say Y here, the kernel will run
+	  on many, but not all, single processor machines. On a single
+	  processor machine, the kernel will run faster if you say N
+	  here.
+
+	  If you don't know what to do here, say N.
+
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on SMP
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  MultiThreading at a cost of slightly increased overhead in some
+	  places. If unsure say N here.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
+	depends on SMP
+	# These have to remain sorted largest to smallest
+	default "64"
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP
+	help
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu.
+
+source kernel/Kconfig.preempt
+
+config UP_LATE_INIT
+       def_bool y
+       depends on !SMP
+
+config HZ
+	int
+	default 100
+
+config ARCH_HAS_HOLES_MEMORYMODEL
+	def_bool y if SPARSEMEM
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool ARCH_SPARSEMEM_ENABLE
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool ARCH_SPARSEMEM_ENABLE
+
+config HAVE_ARCH_PFN_VALID
+	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
+
+config HW_PERF_EVENTS
+	bool "Enable hardware performance counter support for perf events"
+	depends on PERF_EVENTS
+	default y
+	help
+	  Enable hardware performance counter support for perf events. If
+	  disabled, perf events will use software events only.
+
+config SYS_SUPPORTS_HUGETLBFS
+	def_bool y
+
+config ARCH_WANT_GENERAL_HUGETLB
+	def_bool y
+
+config ARCH_WANT_HUGE_PMD_SHARE
+	def_bool y if !ARM64_64K_PAGES
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	def_bool y
+
+config ARCH_HAS_CACHE_LINE_SIZE
+	def_bool y
+
+source "mm/Kconfig"
+
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	---help---
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+config XEN_DOM0
+	def_bool y
+	depends on XEN
+
+config XEN
+	bool "Xen guest support on ARM64"
+	depends on ARM64 && OF
+	select SWIOTLB_XEN
+	help
+	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
+
+config FORCE_MAX_ZONEORDER
+	int
+	default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
+	default "11"
+
+menuconfig ARMV8_DEPRECATED
+	bool "Emulate deprecated/obsolete ARMv8 instructions"
+	depends on COMPAT
+	help
+	  Legacy software support may require certain instructions
+	  that have been deprecated or obsoleted in the architecture.
+
+	  Enable this config to enable selective emulation of these
+	  features.
+
+	  If unsure, say Y
+
+if ARMV8_DEPRECATED
+
+config SWP_EMULATION
+	bool "Emulate SWP/SWPB instructions"
+	help
+	  ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that
+	  they are always undefined. Say Y here to enable software
+	  emulation of these instructions for userspace using LDXR/STXR.
+
+	  In some older versions of glibc [<=2.8] SWP is used during futex
+	  trylock() operations with the assumption that the code will not
+	  be preempted. This invalid assumption may be more likely to fail
+	  with SWP emulation enabled, leading to deadlock of the user
+	  application.
+
+	  NOTE: when accessing uncached shared regions, LDXR/STXR rely
+	  on an external transaction monitoring block called a global
+	  monitor to maintain update atomicity. If your system does not
+	  implement a global monitor, this option can cause programs that
+	  perform SWP operations to uncached memory to deadlock.
+
+	  If unsure, say Y
+
+config CP15_BARRIER_EMULATION
+	bool "Emulate CP15 Barrier instructions"
+	help
+	  The CP15 barrier instructions - CP15ISB, CP15DSB, and
+	  CP15DMB - are deprecated in ARMv8 (and ARMv7). It is
+	  strongly recommended to use the ISB, DSB, and DMB
+	  instructions instead.
+
+	  Say Y here to enable software emulation of these
+	  instructions for AArch32 userspace code. When this option is
+	  enabled, CP15 barrier usage is traced which can help
+	  identify software that needs updating.
+
+	  If unsure, say Y
+
+config SETEND_EMULATION
+	bool "Emulate SETEND instruction"
+	help
+	  The SETEND instruction alters the data-endianness of the
+	  AArch32 EL0, and is deprecated in ARMv8.
+
+	  Say Y here to enable software emulation of the instruction
+	  for AArch32 userspace code.
+
+	  Note: All the cpus on the system must have mixed endian support at EL0
+	  for this feature to be enabled. If a new CPU - which doesn't support mixed
+	  endian - is hotplugged in after this feature has been enabled, there could
+	  be unexpected results in the applications.
+
+	  If unsure, say Y
+endif
+
+endmenu
+
+menu "Boot options"
+
+config CMDLINE
+	string "Default kernel command string"
+	default ""
+	help
+	  Provide a set of default command-line options at build time by
+	  entering them here. As a minimum, you should specify the the
+	  root device (e.g. root=/dev/nfs).
+
+config CMDLINE_FORCE
+	bool "Always use the default kernel command string"
+	help
+	  Always use the default kernel command string, even if the boot
+	  loader passes other arguments to the kernel.
+	  This is useful if you cannot or don't want to change the
+	  command-line options your boot loader passes to the kernel.
+
+config EFI_STUB
+	bool
+
+config EFI
+	bool "UEFI runtime support"
+	depends on OF && !CPU_BIG_ENDIAN
+	select LIBFDT
+	select UCS2_STRING
+	select EFI_PARAMS_FROM_FDT
+	select EFI_RUNTIME_WRAPPERS
+	select EFI_STUB
+	select EFI_ARMSTUB
+	default y
+	help
+	  This option provides support for runtime services provided
+	  by UEFI firmware (such as non-volatile variables, realtime
+          clock, and platform reset). A UEFI stub is also provided to
+	  allow the kernel to be booted as an EFI application. This
+	  is only useful on systems that have UEFI firmware.
+
+config DMI
+	bool "Enable support for SMBIOS (DMI) tables"
+	depends on EFI
+	default y
+	help
+	  This enables SMBIOS/DMI feature for systems.
+
+	  This option is only useful on systems that have UEFI firmware.
+	  However, even with this option, the resultant kernel should
+	  continue to boot on existing non-UEFI platforms.
+
+endmenu
+
+menu "Userspace binary formats"
+
+source "fs/Kconfig.binfmt"
+
+config COMPAT
+	bool "Kernel support for 32-bit EL0"
+	depends on !ARM64_64K_PAGES || EXPERT
+	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
+	select OLD_SIGSUSPEND3
+	select COMPAT_OLD_SIGACTION
+	help
+	  This option enables support for a 32-bit EL0 running under a 64-bit
+	  kernel at EL1. AArch32-specific components such as system calls,
+	  the user helper functions, VFP support and the ptrace interface are
+	  handled appropriately by the kernel.
+
+	  If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
+	  will only be able to execute AArch32 binaries that were compiled with
+	  64k aligned segments.
+
+	  If you want to execute 32-bit userspace applications, say Y.
+
+config SYSVIPC_COMPAT
+	def_bool y
+	depends on COMPAT && SYSVIPC
+
+endmenu
+
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+config ARCH_SUSPEND_POSSIBLE
+	def_bool y
+
+endmenu
+
+menu "CPU Power Management"
+
+source "drivers/cpuidle/Kconfig"
+
+source "drivers/cpufreq/Kconfig"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "drivers/firmware/Kconfig"
+
+source "drivers/acpi/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/arm64/kvm/Kconfig"
+
+source "arch/arm64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm64/crypto/Kconfig"
+endif
+
+source "lib/Kconfig"
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
new file mode 100644
index 000000000..d6285ef9b
--- /dev/null
+++ b/arch/arm64/Kconfig.debug
@@ -0,0 +1,94 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config FRAME_POINTER
+	bool
+	default y
+
+config ARM64_PTDUMP
+	bool "Export kernel pagetable layout to userspace via debugfs"
+	depends on DEBUG_KERNEL
+	select DEBUG_FS
+        help
+	  Say Y here if you want to show the kernel pagetable layout in a
+	  debugfs file. This information is only useful for kernel developers
+	  who are working in architecture specific areas of the kernel.
+	  It is probably not a good idea to enable this feature in a production
+	  kernel.
+	  If in doubt, say "N"
+
+config STRICT_DEVMEM
+	bool "Filter access to /dev/mem"
+	depends on MMU
+	help
+	  If this option is disabled, you allow userspace (root) access to all
+	  of memory, including kernel and userspace memory. Accidental
+	  access to this is obviously disastrous, but specific access can
+	  be used by people debugging the kernel.
+
+	  If this option is switched on, the /dev/mem file only allows
+	  userspace access to memory mapped peripherals.
+
+	  If in doubt, say Y.
+
+config PID_IN_CONTEXTIDR
+	bool "Write the current PID to the CONTEXTIDR register"
+	help
+	  Enabling this option causes the kernel to write the current PID to
+	  the CONTEXTIDR register, at the expense of some additional
+	  instructions during context switch. Say Y here only if you are
+	  planning to use hardware trace tools with this kernel.
+
+config ARM64_RANDOMIZE_TEXT_OFFSET
+	bool "Randomize TEXT_OFFSET at build time"
+	help
+	  Say Y here if you want the image load offset (AKA TEXT_OFFSET)
+	  of the kernel to be randomized at build-time. When selected,
+	  this option will cause TEXT_OFFSET to be randomized upon any
+	  build of the kernel, and the offset will be reflected in the
+	  text_offset field of the resulting Image. This can be used to
+	  fuzz-test bootloaders which respect text_offset.
+
+	  This option is intended for bootloader and/or kernel testing
+	  only. Bootloaders must make no assumptions regarding the value
+	  of TEXT_OFFSET and platforms must not require a specific
+	  value.
+
+config DEBUG_SET_MODULE_RONX
+        bool "Set loadable kernel module data as NX and text as RO"
+        depends on MODULES
+        help
+          This option helps catch unintended modifications to loadable
+          kernel module's text and read-only data. It also prevents execution
+          of module data. Such protection may interfere with run-time code
+          patching and dynamic kernel tracing - and they might also protect
+          against certain classes of kernel exploits.
+          If in doubt, say "N".
+
+config DEBUG_RODATA
+	bool "Make kernel text and rodata read-only"
+	help
+	  If this is set, kernel text and rodata will be made read-only. This
+	  is to help catch accidental or malicious attempts to change the
+	  kernel's executable code. Additionally splits rodata from kernel
+	  text so it can be made explicitly non-executable.
+
+          If in doubt, say Y
+
+config DEBUG_ALIGN_RODATA
+	depends on DEBUG_RODATA && !ARM64_64K_PAGES
+	bool "Align linker sections up to SECTION_SIZE"
+	help
+	  If this option is enabled, sections that may potentially be marked as
+	  read only or non-executable will be aligned up to the section size of
+	  the kernel. This prevents sections from being split into pages and
+	  avoids a potential TLB penalty. The downside is an increase in
+	  alignment and potentially wasted space. Turn on this option if
+	  performance is more important than memory pressure.
+
+	  If in doubt, say N
+
+source "drivers/hwtracing/coresight/Kconfig"
+
+endmenu
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
new file mode 100644
index 000000000..4d2a92599
--- /dev/null
+++ b/arch/arm64/Makefile
@@ -0,0 +1,97 @@
+#
+# arch/arm64/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995-2001 by Russell King
+
+LDFLAGS_vmlinux	:=-p --no-undefined -X
+CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
+OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+GZFLAGS		:=-9
+
+KBUILD_DEFCONFIG := defconfig
+
+KBUILD_CFLAGS	+= -mgeneral-regs-only
+ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
+KBUILD_CPPFLAGS	+= -mbig-endian
+AS		+= -EB
+LD		+= -EB
+else
+KBUILD_CPPFLAGS	+= -mlittle-endian
+AS		+= -EL
+LD		+= -EL
+endif
+
+CHECKFLAGS	+= -D__aarch64__
+
+# Default value
+head-y		:= arch/arm64/kernel/head.o
+
+# The byte offset of the kernel image in RAM from the start of RAM.
+ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y)
+TEXT_OFFSET := $(shell awk 'BEGIN {srand(); printf "0x%03x000\n", int(512 * rand())}')
+else
+TEXT_OFFSET := 0x00080000
+endif
+
+export	TEXT_OFFSET GZFLAGS
+
+core-y		+= arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_NET) += arch/arm64/net/
+core-$(CONFIG_KVM) += arch/arm64/kvm/
+core-$(CONFIG_XEN) += arch/arm64/xen/
+core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
+libs-y		:= arch/arm64/lib/ $(libs-y)
+core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+
+# Default target when executing plain make
+KBUILD_IMAGE	:= Image.gz
+KBUILD_DTBS	:= dtbs
+
+all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)
+
+boot := arch/arm64/boot
+
+Image Image.gz: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+zinstall install: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+%.dtb: scripts
+	$(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
+
+PHONY += dtbs dtbs_install
+
+dtbs: prepare scripts
+	$(Q)$(MAKE) $(build)=$(boot)/dts
+
+dtbs_install:
+	$(Q)$(MAKE) $(dtbinst)=$(boot)/dts
+
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
+
+# We use MRPROPER_FILES and CLEAN_FILES now
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)$(MAKE) $(clean)=$(boot)/dts
+
+define archhelp
+  echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
+  echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
+  echo  '* dtbs          - Build device tree blobs for enabled boards'
+  echo  '  dtbs_install  - Install dtbs to $(INSTALL_DTBS_PATH)'
+  echo  '  install       - Install uncompressed kernel'
+  echo  '  zinstall      - Install compressed kernel'
+  echo  '                  Install using (your) ~/bin/installkernel or'
+  echo  '                  (distribution) /sbin/installkernel or'
+  echo  '                  install to $$(INSTALL_PATH) and run lilo'
+endef
diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore
new file mode 100644
index 000000000..8dab0bb6a
--- /dev/null
+++ b/arch/arm64/boot/.gitignore
@@ -0,0 +1,2 @@
+Image
+Image.gz
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
new file mode 100644
index 000000000..5a0e3ab85
--- /dev/null
+++ b/arch/arm64/boot/Makefile
@@ -0,0 +1,31 @@
+#
+# arch/arm64/boot/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2012, ARM Ltd.
+# Author: Will Deacon <will.deacon@arm.com>
+#
+# Based on the ia64 boot/Makefile.
+#
+
+targets := Image Image.gz
+
+$(obj)/Image: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/Image.gz: $(obj)/Image FORCE
+	$(call if_changed,gzip)
+
+install: $(obj)/Image
+	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+	$(obj)/Image System.map "$(INSTALL_PATH)"
+
+zinstall: $(obj)/Image.gz
+	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+	$(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/arm64/boot/dts/.gitignore b/arch/arm64/boot/dts/.gitignore
new file mode 100644
index 000000000..b60ed208c
--- /dev/null
+++ b/arch/arm64/boot/dts/.gitignore
@@ -0,0 +1 @@
+*.dtb
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
new file mode 100644
index 000000000..ad26a752b
--- /dev/null
+++ b/arch/arm64/boot/dts/Makefile
@@ -0,0 +1,12 @@
+dts-dirs += amd
+dts-dirs += apm
+dts-dirs += arm
+dts-dirs += cavium
+dts-dirs += exynos
+dts-dirs += freescale
+dts-dirs += mediatek
+dts-dirs += qcom
+dts-dirs += sprd
+dts-dirs += xilinx
+
+subdir-y	:= $(dts-dirs)
diff --git a/arch/arm64/boot/dts/amd/Makefile b/arch/arm64/boot/dts/amd/Makefile
new file mode 100644
index 000000000..cfdf701e0
--- /dev/null
+++ b/arch/arm64/boot/dts/amd/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/amd/amd-overdrive.dts b/arch/arm64/boot/dts/amd/amd-overdrive.dts
new file mode 100644
index 000000000..564a3f7df
--- /dev/null
+++ b/arch/arm64/boot/dts/amd/amd-overdrive.dts
@@ -0,0 +1,66 @@
+/*
+ * DTS file for AMD Seattle Overdrive Development Board
+ *
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ */
+
+/dts-v1/;
+
+/include/ "amd-seattle-soc.dtsi"
+
+/ {
+	model = "AMD Seattle Development Board (Overdrive)";
+	compatible = "amd,seattle-overdrive", "amd,seattle";
+
+	chosen {
+		stdout-path = &serial0;
+		linux,pci-probe-only;
+	};
+};
+
+&ccp0 {
+	status = "ok";
+};
+
+&gpio0 {
+	status = "ok";
+};
+
+&gpio1 {
+	status = "ok";
+};
+
+&i2c0 {
+	status = "ok";
+};
+
+&pcie0 {
+	status = "ok";
+};
+
+&spi0 {
+	status = "ok";
+};
+
+&spi1 {
+	status = "ok";
+	sdcard0: sdcard@0 {
+		compatible = "mmc-spi-slot";
+		reg = <0>;
+		spi-max-frequency = <20000000>;
+		voltage-ranges = <3200 3400>;
+		gpios = <&gpio0 7 0>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <7 3>;
+		pl022,hierarchy = <0>;
+		pl022,interface = <0>;
+		pl022,com-mode = <0x0>;
+		pl022,rx-level-trig = <0>;
+		pl022,tx-level-trig = <0>;
+	};
+};
+
+&v2m0 {
+	arm,msi-base-spi = <64>;
+	arm,msi-num-spis = <256>;
+};
diff --git a/arch/arm64/boot/dts/amd/amd-seattle-clks.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-clks.dtsi
new file mode 100644
index 000000000..f623c4652
--- /dev/null
+++ b/arch/arm64/boot/dts/amd/amd-seattle-clks.dtsi
@@ -0,0 +1,54 @@
+/*
+ * DTS file for AMD Seattle Clocks
+ *
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ */
+
+	adl3clk_100mhz: clk100mhz_0 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <100000000>;
+		clock-output-names = "adl3clk_100mhz";
+	};
+
+	ccpclk_375mhz: clk375mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <375000000>;
+		clock-output-names = "ccpclk_375mhz";
+	};
+
+	sataclk_333mhz: clk333mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <333000000>;
+		clock-output-names = "sataclk_333mhz";
+	};
+
+	pcieclk_500mhz: clk500mhz_0 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <500000000>;
+		clock-output-names = "pcieclk_500mhz";
+	};
+
+	dmaclk_500mhz: clk500mhz_1 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <500000000>;
+		clock-output-names = "dmaclk_500mhz";
+	};
+
+	miscclk_250mhz: clk250mhz_4 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <250000000>;
+		clock-output-names = "miscclk_250mhz";
+	};
+
+	uartspiclk_100mhz: clk100mhz_1 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <100000000>;
+		clock-output-names = "uartspiclk_100mhz";
+	};
diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi
new file mode 100644
index 000000000..2874d9288
--- /dev/null
+++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi
@@ -0,0 +1,172 @@
+/*
+ * DTS file for AMD Seattle SoC
+ *
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ */
+
+/ {
+	compatible = "amd,seattle";
+	interrupt-parent = <&gic0>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	gic0: interrupt-controller@e1101000 {
+		compatible = "arm,gic-400", "arm,cortex-a15-gic";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		reg = <0x0 0xe1110000 0 0x1000>,
+		      <0x0 0xe112f000 0 0x2000>,
+		      <0x0 0xe1140000 0 0x10000>,
+		      <0x0 0xe1160000 0 0x10000>;
+		interrupts = <1 9 0xf04>;
+		ranges = <0 0 0 0xe1100000 0 0x100000>;
+		v2m0: v2m@e0080000 {
+			compatible = "arm,gic-v2m-frame";
+			msi-controller;
+			reg = <0x0 0x00080000 0 0x1000>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0xff04>,
+			     <1 14 0xff04>,
+			     <1 11 0xff04>,
+			     <1 10 0xff04>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <0 7 4>,
+			     <0 8 4>,
+			     <0 9 4>,
+			     <0 10 4>,
+			     <0 11 4>,
+			     <0 12 4>,
+			     <0 13 4>,
+			     <0 14 4>;
+	};
+
+	smb0: smb {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		/* DDR range is 40-bit addressing */
+		dma-ranges = <0x80 0x0 0x80 0x0 0x7f 0xffffffff>;
+
+		/include/ "amd-seattle-clks.dtsi"
+
+		sata0: sata@e0300000 {
+			compatible = "snps,dwc-ahci";
+			reg = <0 0xe0300000 0 0x800>;
+			interrupts = <0 355 4>;
+			clocks = <&sataclk_333mhz>;
+			dma-coherent;
+		};
+
+		i2c0: i2c@e1000000 {
+			status = "disabled";
+			compatible = "snps,designware-i2c";
+			reg = <0 0xe1000000 0 0x1000>;
+			interrupts = <0 357 4>;
+			clocks = <&uartspiclk_100mhz>;
+		};
+
+		serial0: serial@e1010000 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0 0xe1010000 0 0x1000>;
+			interrupts = <0 328 4>;
+			clocks = <&uartspiclk_100mhz>, <&uartspiclk_100mhz>;
+			clock-names = "uartclk", "apb_pclk";
+		};
+
+		spi0: ssp@e1020000 {
+			status = "disabled";
+			compatible = "arm,pl022", "arm,primecell";
+			#gpio-cells = <2>;
+			reg = <0 0xe1020000 0 0x1000>;
+			spi-controller;
+			interrupts = <0 330 4>;
+			clocks = <&uartspiclk_100mhz>;
+			clock-names = "apb_pclk";
+		};
+
+		spi1: ssp@e1030000 {
+			status = "disabled";
+			compatible = "arm,pl022", "arm,primecell";
+			#gpio-cells = <2>;
+			reg = <0 0xe1030000 0 0x1000>;
+			spi-controller;
+			interrupts = <0 329 4>;
+			clocks = <&uartspiclk_100mhz>;
+			clock-names = "apb_pclk";
+			num-cs = <1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		gpio0: gpio@e1040000 {
+			status = "disabled";
+			compatible = "arm,pl061", "arm,primecell";
+			#gpio-cells = <2>;
+			reg = <0 0xe1040000 0 0x1000>;
+			gpio-controller;
+			interrupts = <0 359 4>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&uartspiclk_100mhz>;
+			clock-names = "apb_pclk";
+		};
+
+		gpio1: gpio@e1050000 {
+			status = "disabled";
+			compatible = "arm,pl061", "arm,primecell";
+			#gpio-cells = <2>;
+			reg = <0 0xe1050000 0 0x1000>;
+			gpio-controller;
+			interrupts = <0 358 4>;
+			clocks = <&uartspiclk_100mhz>;
+			clock-names = "apb_pclk";
+		};
+
+		ccp0: ccp@e0100000 {
+			status = "disabled";
+			compatible = "amd,ccp-seattle-v1a";
+			reg = <0 0xe0100000 0 0x10000>;
+			interrupts = <0 3 4>;
+			dma-coherent;
+		};
+
+		pcie0: pcie@f0000000 {
+			compatible = "pci-host-ecam-generic";
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			device_type = "pci";
+			bus-range = <0 0x7f>;
+			msi-parent = <&v2m0>;
+			reg = <0 0xf0000000 0 0x10000000>;
+
+			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+			interrupt-map =
+				<0x1000 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x120 0x1>,
+				<0x1000 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x121 0x1>,
+				<0x1000 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x122 0x1>,
+				<0x1000 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x123 0x1>;
+
+			dma-coherent;
+			dma-ranges = <0x43000000 0x80 0x0 0x80 0x0 0x7f 0xffffffff>;
+			ranges =
+				/* I/O Memory (size=64K) */
+				<0x01000000 0x00 0x00000000 0x00 0xefff0000 0x00 0x00010000>,
+				/* 32-bit MMIO (size=2G) */
+				<0x02000000 0x00 0x40000000 0x00 0x40000000 0x00 0x80000000>,
+				/* 64-bit MMIO (size= 124G) */
+				<0x03000000 0x01 0x00000000 0x01 0x00000000 0x7f 0x00000000>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/apm/Makefile b/arch/arm64/boot/dts/apm/Makefile
new file mode 100644
index 000000000..a2afabbc1
--- /dev/null
+++ b/arch/arm64/boot/dts/apm/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
new file mode 100644
index 000000000..83578e766
--- /dev/null
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -0,0 +1,54 @@
+/*
+ * dts file for AppliedMicro (APM) Mustang Board
+ *
+ * Copyright (C) 2013, Applied Micro Circuits Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/dts-v1/;
+
+/include/ "apm-storm.dtsi"
+
+/ {
+	model = "APM X-Gene Mustang board";
+	compatible = "apm,mustang", "apm,xgene-storm";
+
+	chosen { };
+
+	memory {
+		device_type = "memory";
+		reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */
+	};
+};
+
+&pcie0clk {
+	status = "ok";
+};
+
+&pcie0 {
+	status = "ok";
+};
+
+&serial0 {
+	status = "ok";
+};
+
+&menet {
+	status = "ok";
+};
+
+&sgenet0 {
+	status = "ok";
+};
+
+&sgenet1 {
+	status = "ok";
+};
+
+&xgenet {
+	status = "ok";
+};
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
new file mode 100644
index 000000000..c8d3e0e86
--- /dev/null
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -0,0 +1,714 @@
+/*
+ * dts file for AppliedMicro (APM) X-Gene Storm SOC
+ *
+ * Copyright (C) 2013, Applied Micro Circuits Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/ {
+	compatible = "apm,xgene-storm";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@000 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x000>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@001 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x001>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@100 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x100>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@101 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x101>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@200 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x200>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@201 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x201>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@300 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x300>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@301 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x301>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+	};
+
+	gic: interrupt-controller@78010000 {
+		compatible = "arm,cortex-a15-gic";
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		reg = <0x0 0x78010000 0x0 0x1000>,	/* GIC Dist */
+		      <0x0 0x78020000 0x0 0x1000>,	/* GIC CPU */
+		      <0x0 0x78040000 0x0 0x2000>,	/* GIC VCPU Control */
+		      <0x0 0x78060000 0x0 0x2000>;	/* GIC VCPU */
+		interrupts = <1 9 0xf04>;	/* GIC Maintenence IRQ */
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 0 0xff01>,	/* Secure Phys IRQ */
+			     <1 13 0xff01>,	/* Non-secure Phys IRQ */
+			     <1 14 0xff01>,	/* Virt IRQ */
+			     <1 15 0xff01>;	/* Hyp IRQ */
+		clock-frequency = <50000000>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		dma-ranges = <0x0 0x0 0x0 0x0 0x400 0x0>;
+
+		clocks {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+			refclk: refclk {
+				compatible = "fixed-clock";
+				#clock-cells = <1>;
+				clock-frequency = <100000000>;
+				clock-output-names = "refclk";
+			};
+
+			pcppll: pcppll@17000100 {
+				compatible = "apm,xgene-pcppll-clock";
+				#clock-cells = <1>;
+				clocks = <&refclk 0>;
+				clock-names = "pcppll";
+				reg = <0x0 0x17000100 0x0 0x1000>;
+				clock-output-names = "pcppll";
+				type = <0>;
+			};
+
+			socpll: socpll@17000120 {
+				compatible = "apm,xgene-socpll-clock";
+				#clock-cells = <1>;
+				clocks = <&refclk 0>;
+				clock-names = "socpll";
+				reg = <0x0 0x17000120 0x0 0x1000>;
+				clock-output-names = "socpll";
+				type = <1>;
+			};
+
+			socplldiv2: socplldiv2  {
+				compatible = "fixed-factor-clock";
+				#clock-cells = <1>;
+				clocks = <&socpll 0>;
+				clock-names = "socplldiv2";
+				clock-mult = <1>;
+				clock-div = <2>;
+				clock-output-names = "socplldiv2";
+			};
+
+			qmlclk: qmlclk {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				clock-names = "qmlclk";
+				reg = <0x0 0x1703C000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "qmlclk";
+			};
+
+			ethclk: ethclk {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				clock-names = "ethclk";
+				reg = <0x0 0x17000000 0x0 0x1000>;
+				reg-names = "div-reg";
+				divider-offset = <0x238>;
+				divider-width = <0x9>;
+				divider-shift = <0x0>;
+				clock-output-names = "ethclk";
+			};
+
+			menetclk: menetclk {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&ethclk 0>;
+				reg = <0x0 0x1702C000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "menetclk";
+			};
+
+			sge0clk: sge0clk@1f21c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f21c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				csr-mask = <0x3>;
+				clock-output-names = "sge0clk";
+			};
+
+			sge1clk: sge1clk@1f21c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f21c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				csr-mask = <0xc>;
+				clock-output-names = "sge1clk";
+			};
+
+			xge0clk: xge0clk@1f61c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f61c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				csr-mask = <0x3>;
+				clock-output-names = "xge0clk";
+			};
+
+			sataphy1clk: sataphy1clk@1f21c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f21c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sataphy1clk";
+				status = "disabled";
+				csr-offset = <0x4>;
+				csr-mask = <0x00>;
+				enable-offset = <0x0>;
+				enable-mask = <0x06>;
+			};
+
+			sataphy2clk: sataphy1clk@1f22c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f22c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sataphy2clk";
+				status = "ok";
+				csr-offset = <0x4>;
+				csr-mask = <0x3a>;
+				enable-offset = <0x0>;
+				enable-mask = <0x06>;
+			};
+
+			sataphy3clk: sataphy1clk@1f23c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f23c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sataphy3clk";
+				status = "ok";
+				csr-offset = <0x4>;
+				csr-mask = <0x3a>;
+				enable-offset = <0x0>;
+				enable-mask = <0x06>;
+			};
+
+			sata01clk: sata01clk@1f21c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f21c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sata01clk";
+				csr-offset = <0x4>;
+				csr-mask = <0x05>;
+				enable-offset = <0x0>;
+				enable-mask = <0x39>;
+			};
+
+			sata23clk: sata23clk@1f22c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f22c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sata23clk";
+				csr-offset = <0x4>;
+				csr-mask = <0x05>;
+				enable-offset = <0x0>;
+				enable-mask = <0x39>;
+			};
+
+			sata45clk: sata45clk@1f23c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f23c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sata45clk";
+				csr-offset = <0x4>;
+				csr-mask = <0x05>;
+				enable-offset = <0x0>;
+				enable-mask = <0x39>;
+			};
+
+			rtcclk: rtcclk@17000000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x17000000 0x0 0x2000>;
+				reg-names = "csr-reg";
+				csr-offset = <0xc>;
+				csr-mask = <0x2>;
+				enable-offset = <0x10>;
+				enable-mask = <0x2>;
+				clock-output-names = "rtcclk";
+			};
+
+			rngpkaclk: rngpkaclk@17000000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x17000000 0x0 0x2000>;
+				reg-names = "csr-reg";
+				csr-offset = <0xc>;
+				csr-mask = <0x10>;
+				enable-offset = <0x10>;
+				enable-mask = <0x10>;
+				clock-output-names = "rngpkaclk";
+			};
+
+			pcie0clk: pcie0clk@1f2bc000 {
+				status = "disabled";
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f2bc000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "pcie0clk";
+			};
+
+			pcie1clk: pcie1clk@1f2cc000 {
+				status = "disabled";
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f2cc000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "pcie1clk";
+			};
+
+			pcie2clk: pcie2clk@1f2dc000 {
+				status = "disabled";
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f2dc000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "pcie2clk";
+			};
+
+			pcie3clk: pcie3clk@1f50c000 {
+				status = "disabled";
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f50c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "pcie3clk";
+			};
+
+			pcie4clk: pcie4clk@1f51c000 {
+				status = "disabled";
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f51c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "pcie4clk";
+			};
+
+			dmaclk: dmaclk@1f27c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f27c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "dmaclk";
+			};
+		};
+
+		pcie0: pcie@1f2b0000 {
+			status = "disabled";
+			device_type = "pci";
+			compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = < 0x00 0x1f2b0000 0x0 0x00010000   /* Controller registers */
+				0xe0 0xd0000000 0x0 0x00040000>; /* PCI config space */
+			reg-names = "csr", "cfg";
+			ranges = <0x01000000 0x00 0x00000000 0xe0 0x10000000 0x00 0x00010000   /* io */
+				  0x02000000 0x00 0x80000000 0xe1 0x80000000 0x00 0x80000000>; /* mem */
+			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
+				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc2 0x1
+					 0x0 0x0 0x0 0x2 &gic 0x0 0xc3 0x1
+					 0x0 0x0 0x0 0x3 &gic 0x0 0xc4 0x1
+					 0x0 0x0 0x0 0x4 &gic 0x0 0xc5 0x1>;
+			dma-coherent;
+			clocks = <&pcie0clk 0>;
+		};
+
+		pcie1: pcie@1f2c0000 {
+			status = "disabled";
+			device_type = "pci";
+			compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = < 0x00 0x1f2c0000 0x0 0x00010000   /* Controller registers */
+				0xd0 0xd0000000 0x0 0x00040000>; /* PCI config space */
+			reg-names = "csr", "cfg";
+			ranges = <0x01000000 0x0 0x00000000 0xd0 0x10000000 0x00 0x00010000   /* io  */
+				  0x02000000 0x0 0x80000000 0xd1 0x80000000 0x00 0x80000000>; /* mem */
+			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
+				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc8 0x1
+					 0x0 0x0 0x0 0x2 &gic 0x0 0xc9 0x1
+					 0x0 0x0 0x0 0x3 &gic 0x0 0xca 0x1
+					 0x0 0x0 0x0 0x4 &gic 0x0 0xcb 0x1>;
+			dma-coherent;
+			clocks = <&pcie1clk 0>;
+		};
+
+		pcie2: pcie@1f2d0000 {
+			status = "disabled";
+			device_type = "pci";
+			compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg =  < 0x00 0x1f2d0000 0x0 0x00010000   /* Controller registers */
+				 0x90 0xd0000000 0x0 0x00040000>; /* PCI config space */
+			reg-names = "csr", "cfg";
+			ranges = <0x01000000 0x0 0x00000000 0x90 0x10000000 0x0 0x00010000   /* io  */
+				  0x02000000 0x0 0x80000000 0x91 0x80000000 0x0 0x80000000>; /* mem */
+			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
+				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xce 0x1
+					 0x0 0x0 0x0 0x2 &gic 0x0 0xcf 0x1
+					 0x0 0x0 0x0 0x3 &gic 0x0 0xd0 0x1
+					 0x0 0x0 0x0 0x4 &gic 0x0 0xd1 0x1>;
+			dma-coherent;
+			clocks = <&pcie2clk 0>;
+		};
+
+		pcie3: pcie@1f500000 {
+			status = "disabled";
+			device_type = "pci";
+			compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = < 0x00 0x1f500000 0x0 0x00010000   /* Controller registers */
+				0xa0 0xd0000000 0x0 0x00040000>; /* PCI config space */
+			reg-names = "csr", "cfg";
+			ranges = <0x01000000 0x0 0x00000000 0xa0 0x10000000 0x0 0x00010000   /* io   */
+				  0x02000000 0x0 0x80000000 0xa1 0x80000000 0x0 0x80000000>; /* mem  */
+			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
+				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xd4 0x1
+					 0x0 0x0 0x0 0x2 &gic 0x0 0xd5 0x1
+					 0x0 0x0 0x0 0x3 &gic 0x0 0xd6 0x1
+					 0x0 0x0 0x0 0x4 &gic 0x0 0xd7 0x1>;
+			dma-coherent;
+			clocks = <&pcie3clk 0>;
+		};
+
+		pcie4: pcie@1f510000 {
+			status = "disabled";
+			device_type = "pci";
+			compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = < 0x00 0x1f510000 0x0 0x00010000   /* Controller registers */
+				0xc0 0xd0000000 0x0 0x00200000>; /* PCI config space */
+			reg-names = "csr", "cfg";
+			ranges = <0x01000000 0x0 0x00000000 0xc0 0x10000000 0x0 0x00010000   /* io  */
+				  0x02000000 0x0 0x80000000 0xc1 0x80000000 0x0 0x80000000>; /* mem */
+			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
+				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xda 0x1
+					 0x0 0x0 0x0 0x2 &gic 0x0 0xdb 0x1
+					 0x0 0x0 0x0 0x3 &gic 0x0 0xdc 0x1
+					 0x0 0x0 0x0 0x4 &gic 0x0 0xdd 0x1>;
+			dma-coherent;
+			clocks = <&pcie4clk 0>;
+		};
+
+		serial0: serial@1c020000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c020000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4c 0x4>;
+		};
+
+		serial1: serial@1c021000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c021000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4d 0x4>;
+		};
+
+		serial2: serial@1c022000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c022000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4e 0x4>;
+		};
+
+		serial3: serial@1c023000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c023000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4f 0x4>;
+		};
+
+		phy1: phy@1f21a000 {
+			compatible = "apm,xgene-phy";
+			reg = <0x0 0x1f21a000 0x0 0x100>;
+			#phy-cells = <1>;
+			clocks = <&sataphy1clk 0>;
+			status = "disabled";
+			apm,tx-boost-gain = <30 30 30 30 30 30>;
+			apm,tx-eye-tuning = <2 10 10 2 10 10>;
+		};
+
+		phy2: phy@1f22a000 {
+			compatible = "apm,xgene-phy";
+			reg = <0x0 0x1f22a000 0x0 0x100>;
+			#phy-cells = <1>;
+			clocks = <&sataphy2clk 0>;
+			status = "ok";
+			apm,tx-boost-gain = <30 30 30 30 30 30>;
+			apm,tx-eye-tuning = <1 10 10 2 10 10>;
+		};
+
+		phy3: phy@1f23a000 {
+			compatible = "apm,xgene-phy";
+			reg = <0x0 0x1f23a000 0x0 0x100>;
+			#phy-cells = <1>;
+			clocks = <&sataphy3clk 0>;
+			status = "ok";
+			apm,tx-boost-gain = <31 31 31 31 31 31>;
+			apm,tx-eye-tuning = <2 10 10 2 10 10>;
+		};
+
+		sata1: sata@1a000000 {
+			compatible = "apm,xgene-ahci";
+			reg = <0x0 0x1a000000 0x0 0x1000>,
+			      <0x0 0x1f210000 0x0 0x1000>,
+			      <0x0 0x1f21d000 0x0 0x1000>,
+			      <0x0 0x1f21e000 0x0 0x1000>,
+			      <0x0 0x1f217000 0x0 0x1000>;
+			interrupts = <0x0 0x86 0x4>;
+			dma-coherent;
+			status = "disabled";
+			clocks = <&sata01clk 0>;
+			phys = <&phy1 0>;
+			phy-names = "sata-phy";
+		};
+
+		sata2: sata@1a400000 {
+			compatible = "apm,xgene-ahci";
+			reg = <0x0 0x1a400000 0x0 0x1000>,
+			      <0x0 0x1f220000 0x0 0x1000>,
+			      <0x0 0x1f22d000 0x0 0x1000>,
+			      <0x0 0x1f22e000 0x0 0x1000>,
+			      <0x0 0x1f227000 0x0 0x1000>;
+			interrupts = <0x0 0x87 0x4>;
+			dma-coherent;
+			status = "ok";
+			clocks = <&sata23clk 0>;
+			phys = <&phy2 0>;
+			phy-names = "sata-phy";
+		};
+
+		sata3: sata@1a800000 {
+			compatible = "apm,xgene-ahci";
+			reg = <0x0 0x1a800000 0x0 0x1000>,
+			      <0x0 0x1f230000 0x0 0x1000>,
+			      <0x0 0x1f23d000 0x0 0x1000>,
+			      <0x0 0x1f23e000 0x0 0x1000>;
+			interrupts = <0x0 0x88 0x4>;
+			dma-coherent;
+			status = "ok";
+			clocks = <&sata45clk 0>;
+			phys = <&phy3 0>;
+			phy-names = "sata-phy";
+		};
+
+		rtc: rtc@10510000 {
+			compatible = "apm,xgene-rtc";
+			reg = <0x0 0x10510000 0x0 0x400>;
+			interrupts = <0x0 0x46 0x4>;
+			#clock-cells = <1>;
+			clocks = <&rtcclk 0>;
+		};
+
+		menet: ethernet@17020000 {
+			compatible = "apm,xgene-enet";
+			status = "disabled";
+			reg = <0x0 0x17020000 0x0 0xd100>,
+			      <0x0 0X17030000 0x0 0Xc300>,
+			      <0x0 0X10000000 0x0 0X200>;
+			reg-names = "enet_csr", "ring_csr", "ring_cmd";
+			interrupts = <0x0 0x3c 0x4>;
+			dma-coherent;
+			clocks = <&menetclk 0>;
+			/* mac address will be overwritten by the bootloader */
+			local-mac-address = [00 00 00 00 00 00];
+			phy-connection-type = "rgmii";
+			phy-handle = <&menetphy>;
+			mdio {
+				compatible = "apm,xgene-mdio";
+				#address-cells = <1>;
+				#size-cells = <0>;
+				menetphy: menetphy@3 {
+					compatible = "ethernet-phy-id001c.c915";
+					reg = <0x3>;
+				};
+
+			};
+		};
+
+		sgenet0: ethernet@1f210000 {
+			compatible = "apm,xgene1-sgenet";
+			status = "disabled";
+			reg = <0x0 0x1f210000 0x0 0xd100>,
+			      <0x0 0x1f200000 0x0 0Xc300>,
+			      <0x0 0x1B000000 0x0 0X200>;
+			reg-names = "enet_csr", "ring_csr", "ring_cmd";
+			interrupts = <0x0 0xA0 0x4>,
+				     <0x0 0xA1 0x4>;
+			dma-coherent;
+			clocks = <&sge0clk 0>;
+			local-mac-address = [00 00 00 00 00 00];
+			phy-connection-type = "sgmii";
+		};
+
+		sgenet1: ethernet@1f210030 {
+			compatible = "apm,xgene1-sgenet";
+			status = "disabled";
+			reg = <0x0 0x1f210030 0x0 0xd100>,
+			      <0x0 0x1f200000 0x0 0Xc300>,
+			      <0x0 0x1B000000 0x0 0X8000>;
+			reg-names = "enet_csr", "ring_csr", "ring_cmd";
+			interrupts = <0x0 0xAC 0x4>,
+				     <0x0 0xAD 0x4>;
+			port-id = <1>;
+			dma-coherent;
+			clocks = <&sge1clk 0>;
+			local-mac-address = [00 00 00 00 00 00];
+			phy-connection-type = "sgmii";
+		};
+
+		xgenet: ethernet@1f610000 {
+			compatible = "apm,xgene1-xgenet";
+			status = "disabled";
+			reg = <0x0 0x1f610000 0x0 0xd100>,
+			      <0x0 0x1f600000 0x0 0Xc300>,
+			      <0x0 0x18000000 0x0 0X200>;
+			reg-names = "enet_csr", "ring_csr", "ring_cmd";
+			interrupts = <0x0 0x60 0x4>,
+				     <0x0 0x61 0x4>;
+			dma-coherent;
+			clocks = <&xge0clk 0>;
+			/* mac address will be overwritten by the bootloader */
+			local-mac-address = [00 00 00 00 00 00];
+			phy-connection-type = "xgmii";
+		};
+
+		rng: rng@10520000 {
+			compatible = "apm,xgene-rng";
+			reg = <0x0 0x10520000 0x0 0x100>;
+			interrupts = <0x0 0x41 0x4>;
+			clocks = <&rngpkaclk 0>;
+		};
+
+		dma: dma@1f270000 {
+			compatible = "apm,xgene-storm-dma";
+			device_type = "dma";
+			reg = <0x0 0x1f270000 0x0 0x10000>,
+			      <0x0 0x1f200000 0x0 0x10000>,
+			      <0x0 0x1b008000 0x0 0x2000>,
+			      <0x0 0x1054a000 0x0 0x100>;
+			interrupts = <0x0 0x82 0x4>,
+				     <0x0 0xb8 0x4>,
+				     <0x0 0xb9 0x4>,
+				     <0x0 0xba 0x4>,
+				     <0x0 0xbb 0x4>;
+			dma-coherent;
+			clocks = <&dmaclk 0>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/arm/Makefile b/arch/arm64/boot/dts/arm/Makefile
new file mode 100644
index 000000000..301a0dada
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/Makefile
@@ -0,0 +1,7 @@
+dtb-$(CONFIG_ARCH_VEXPRESS) += foundation-v8.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/arm/foundation-v8.dts b/arch/arm64/boot/dts/arm/foundation-v8.dts
new file mode 100644
index 000000000..4eac8dcea
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/foundation-v8.dts
@@ -0,0 +1,240 @@
+/*
+ * ARM Ltd.
+ *
+ * ARMv8 Foundation model DTS
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+	model = "Foundation-v8A";
+	compatible = "arm,foundation-aarch64", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+
+		L2_0: l2-cache0 {
+			compatible = "cache";
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000 0 0x80000000>,
+		      <0x00000008 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x0 0x2c001000 0 0x1000>,
+		      <0x0 0x2c002000 0 0x1000>,
+		      <0x0 0x2c004000 0 0x2000>,
+		      <0x0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+		clock-frequency = <100000000>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <0 60 4>,
+			     <0 61 4>,
+			     <0 62 4>,
+			     <0 63 4>;
+	};
+
+	smb {
+		compatible = "arm,vexpress,v2m-p1", "simple-bus";
+		arm,v2m-memory-map = "rs1";
+		#address-cells = <2>; /* SMB chipselect number and offset */
+		#size-cells = <1>;
+
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		ethernet@2,02000000 {
+			compatible = "smsc,lan91c111";
+			reg = <2 0x02000000 0x10000>;
+			interrupts = <15>;
+		};
+
+		v2m_clk24mhz: clk24mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <24000000>;
+			clock-output-names = "v2m:clk24mhz";
+		};
+
+		v2m_refclk1mhz: refclk1mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1000000>;
+			clock-output-names = "v2m:refclk1mhz";
+		};
+
+		v2m_refclk32khz: refclk32khz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+			clock-output-names = "v2m:refclk32khz";
+		};
+
+		iofpga@3,00000000 {
+			compatible = "arm,amba-bus", "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 3 0 0x200000>;
+
+			v2m_sysreg: sysreg@010000 {
+				compatible = "arm,vexpress-sysreg";
+				reg = <0x010000 0x1000>;
+			};
+
+			v2m_serial0: uart@090000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x090000 0x1000>;
+				interrupts = <5>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial1: uart@0a0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0a0000 0x1000>;
+				interrupts = <6>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial2: uart@0b0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0b0000 0x1000>;
+				interrupts = <7>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial3: uart@0c0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0c0000 0x1000>;
+				interrupts = <8>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			virtio_block@0130000 {
+				compatible = "virtio,mmio";
+				reg = <0x130000 0x200>;
+				interrupts = <42>;
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
new file mode 100644
index 000000000..c9b89efe0
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
@@ -0,0 +1,44 @@
+/*
+ * ARM Juno Platform clocks
+ *
+ * Copyright (c) 2013-2014 ARM Ltd
+ *
+ * This file is licensed under a dual GPLv2 or BSD license.
+ *
+ */
+
+	/* SoC fixed clocks */
+	soc_uartclk: refclk7273800hz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <7273800>;
+		clock-output-names = "juno:uartclk";
+	};
+
+	soc_usb48mhz: clk48mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <48000000>;
+		clock-output-names = "clk48mhz";
+	};
+
+	soc_smc50mhz: clk50mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <50000000>;
+		clock-output-names = "smc_clk";
+	};
+
+	soc_refclk100mhz: refclk100mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <100000000>;
+		clock-output-names = "apb_pclk";
+	};
+
+	soc_faxiclk: refclk533mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <533000000>;
+		clock-output-names = "faxi_clk";
+	};
diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
new file mode 100644
index 000000000..351c95bda
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
@@ -0,0 +1,152 @@
+/*
+ * ARM Juno Platform motherboard peripherals
+ *
+ * Copyright (c) 2013-2014 ARM Ltd
+ *
+ * This file is licensed under a dual GPLv2 or BSD license.
+ *
+ */
+
+		mb_clk24mhz: clk24mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <24000000>;
+			clock-output-names = "juno_mb:clk24mhz";
+		};
+
+		mb_clk25mhz: clk25mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <25000000>;
+			clock-output-names = "juno_mb:clk25mhz";
+		};
+
+		v2m_refclk1mhz: refclk1mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1000000>;
+			clock-output-names = "juno_mb:refclk1mhz";
+		};
+
+		v2m_refclk32khz: refclk32khz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+			clock-output-names = "juno_mb:refclk32khz";
+		};
+
+		motherboard {
+			compatible = "arm,vexpress,v2p-p1", "simple-bus";
+			#address-cells = <2>;  /* SMB chipselect number and offset */
+			#size-cells = <1>;
+			#interrupt-cells = <1>;
+			ranges;
+			model = "V2M-Juno";
+			arm,hbi = <0x252>;
+			arm,vexpress,site = <0>;
+			arm,v2m-memory-map = "rs1";
+
+			mb_fixed_3v3: fixedregulator@0 {
+				compatible = "regulator-fixed";
+				regulator-name = "MCC_SB_3V3";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			ethernet@2,00000000 {
+				compatible = "smsc,lan9118", "smsc,lan9115";
+				reg = <2 0x00000000 0x10000>;
+				interrupts = <3>;
+				phy-mode = "mii";
+				reg-io-width = <4>;
+				smsc,irq-active-high;
+				smsc,irq-push-pull;
+				clocks = <&mb_clk25mhz>;
+				vdd33a-supply = <&mb_fixed_3v3>;
+				vddvario-supply = <&mb_fixed_3v3>;
+			};
+
+			usb@5,00000000 {
+				compatible = "nxp,usb-isp1763";
+				reg = <5 0x00000000 0x20000>;
+				bus-width = <16>;
+				interrupts = <4>;
+			};
+
+			iofpga@3,00000000 {
+				compatible = "arm,amba-bus", "simple-bus";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 3 0 0x200000>;
+
+				v2m_sysctl: sysctl@020000 {
+					compatible = "arm,sp810", "arm,primecell";
+					reg = <0x020000 0x1000>;
+					clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&mb_clk24mhz>;
+					clock-names = "refclk", "timclk", "apb_pclk";
+					#clock-cells = <1>;
+					clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+				};
+
+				mmci@050000 {
+					compatible = "arm,pl180", "arm,primecell";
+					reg = <0x050000 0x1000>;
+					interrupts = <5>;
+					/* cd-gpios = <&v2m_mmc_gpios 0 0>;
+					wp-gpios = <&v2m_mmc_gpios 1 0>; */
+					max-frequency = <12000000>;
+					vmmc-supply = <&mb_fixed_3v3>;
+					clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
+					clock-names = "mclk", "apb_pclk";
+				};
+
+				kmi@060000 {
+					compatible = "arm,pl050", "arm,primecell";
+					reg = <0x060000 0x1000>;
+					interrupts = <8>;
+					clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
+					clock-names = "KMIREFCLK", "apb_pclk";
+				};
+
+				kmi@070000 {
+					compatible = "arm,pl050", "arm,primecell";
+					reg = <0x070000 0x1000>;
+					interrupts = <8>;
+					clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
+					clock-names = "KMIREFCLK", "apb_pclk";
+				};
+
+				wdt@0f0000 {
+					compatible = "arm,sp805", "arm,primecell";
+					reg = <0x0f0000 0x10000>;
+					interrupts = <7>;
+					clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
+					clock-names = "wdogclk", "apb_pclk";
+				};
+
+				v2m_timer01: timer@110000 {
+					compatible = "arm,sp804", "arm,primecell";
+					reg = <0x110000 0x10000>;
+					interrupts = <9>;
+					clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&mb_clk24mhz>;
+					clock-names = "timclken1", "timclken2", "apb_pclk";
+				};
+
+				v2m_timer23: timer@120000 {
+					compatible = "arm,sp804", "arm,primecell";
+					reg = <0x120000 0x10000>;
+					interrupts = <9>;
+					clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&mb_clk24mhz>;
+					clock-names = "timclken1", "timclken2", "apb_pclk";
+				};
+
+				rtc@170000 {
+					compatible = "arm,pl031", "arm,primecell";
+					reg = <0x170000 0x10000>;
+					interrupts = <0>;
+					clocks = <&soc_smc50mhz>;
+					clock-names = "apb_pclk";
+				};
+			};
+		};
diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts
new file mode 100644
index 000000000..5e9110a33
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/juno.dts
@@ -0,0 +1,238 @@
+/*
+ * ARM Ltd. Juno Platform
+ *
+ * Copyright (c) 2013-2014 ARM Ltd.
+ *
+ * This file is licensed under a dual GPLv2 or BSD license.
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	model = "ARM Juno development board (r0)";
+	compatible = "arm,juno", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		serial0 = &soc_uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	psci {
+		compatible = "arm,psci-0.2";
+		method = "smc";
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		A57_0: cpu@0 {
+			compatible = "arm,cortex-a57","arm,armv8";
+			reg = <0x0 0x0>;
+			device_type = "cpu";
+			enable-method = "psci";
+			next-level-cache = <&A57_L2>;
+		};
+
+		A57_1: cpu@1 {
+			compatible = "arm,cortex-a57","arm,armv8";
+			reg = <0x0 0x1>;
+			device_type = "cpu";
+			enable-method = "psci";
+			next-level-cache = <&A57_L2>;
+		};
+
+		A53_0: cpu@100 {
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x100>;
+			device_type = "cpu";
+			enable-method = "psci";
+			next-level-cache = <&A53_L2>;
+		};
+
+		A53_1: cpu@101 {
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x101>;
+			device_type = "cpu";
+			enable-method = "psci";
+			next-level-cache = <&A53_L2>;
+		};
+
+		A53_2: cpu@102 {
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x102>;
+			device_type = "cpu";
+			enable-method = "psci";
+			next-level-cache = <&A53_L2>;
+		};
+
+		A53_3: cpu@103 {
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x103>;
+			device_type = "cpu";
+			enable-method = "psci";
+			next-level-cache = <&A53_L2>;
+		};
+
+		A57_L2: l2-cache0 {
+			compatible = "cache";
+		};
+
+		A53_L2: l2-cache1 {
+			compatible = "cache";
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		/* last 16MB of the first memory area is reserved for secure world use by firmware */
+		reg = <0x00000000 0x80000000 0x0 0x7f000000>,
+		      <0x00000008 0x80000000 0x1 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,gic-400", "arm,cortex-a15-gic";
+		reg = <0x0 0x2c010000 0 0x1000>,
+		      <0x0 0x2c02f000 0 0x2000>,
+		      <0x0 0x2c04f000 0 0x2000>,
+		      <0x0 0x2c06f000 0 0x2000>;
+		#address-cells = <0>;
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_HIGH)>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&A57_0>,
+				     <&A57_1>,
+				     <&A53_0>,
+				     <&A53_1>,
+				     <&A53_2>,
+				     <&A53_3>;
+	};
+
+	/include/ "juno-clocks.dtsi"
+
+	dma@7ff00000 {
+		compatible = "arm,pl330", "arm,primecell";
+		reg = <0x0 0x7ff00000 0 0x1000>;
+		#dma-cells = <1>;
+		#dma-channels = <8>;
+		#dma-requests = <32>;
+		interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&soc_faxiclk>;
+		clock-names = "apb_pclk";
+	};
+
+	soc_uart0: uart@7ff80000 {
+		compatible = "arm,pl011", "arm,primecell";
+		reg = <0x0 0x7ff80000 0x0 0x1000>;
+		interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&soc_uartclk>, <&soc_refclk100mhz>;
+		clock-names = "uartclk", "apb_pclk";
+	};
+
+	i2c@7ffa0000 {
+		compatible = "snps,designware-i2c";
+		reg = <0x0 0x7ffa0000 0x0 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
+		clock-frequency = <400000>;
+		i2c-sda-hold-time-ns = <500>;
+		clocks = <&soc_smc50mhz>;
+
+		dvi0: dvi-transmitter@70 {
+			compatible = "nxp,tda998x";
+			reg = <0x70>;
+		};
+
+		dvi1: dvi-transmitter@71 {
+			compatible = "nxp,tda998x";
+			reg = <0x71>;
+		};
+	};
+
+	ohci@7ffb0000 {
+		compatible = "generic-ohci";
+		reg = <0x0 0x7ffb0000 0x0 0x10000>;
+		interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&soc_usb48mhz>;
+	};
+
+	ehci@7ffc0000 {
+		compatible = "generic-ehci";
+		reg = <0x0 0x7ffc0000 0x0 0x10000>;
+		interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&soc_usb48mhz>;
+	};
+
+	memory-controller@7ffd0000 {
+		compatible = "arm,pl354", "arm,primecell";
+		reg = <0 0x7ffd0000 0 0x1000>;
+		interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&soc_smc50mhz>;
+		clock-names = "apb_pclk";
+	};
+
+	smb {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 15>;
+		interrupt-map = <0 0  0 &gic 0  68 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  1 &gic 0  69 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  2 &gic 0  70 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  3 &gic 0 160 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  4 &gic 0 161 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  5 &gic 0 162 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  6 &gic 0 163 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  7 &gic 0 164 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  8 &gic 0 165 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  9 &gic 0 166 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 10 &gic 0 167 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 11 &gic 0 168 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 12 &gic 0 169 IRQ_TYPE_LEVEL_HIGH>;
+
+		/include/ "juno-motherboard.dtsi"
+	};
+};
diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts
new file mode 100644
index 000000000..20addabbd
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts
@@ -0,0 +1,167 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Architecture Envelope Model (AEM) ARMv8-A
+ * ARMAEMv8AMPCT
+ *
+ * RTSM_VE_AEMv8A.lisa
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+	model = "RTSM_VE_AEMv8A";
+	compatible = "arm,rtsm_ve,aemv8a", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x8000fff8>;
+			next-level-cache = <&L2_0>;
+		};
+
+		L2_0: l2-cache0 {
+			compatible = "cache";
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000 0 0x80000000>,
+		      <0x00000008 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x0 0x2c001000 0 0x1000>,
+		      <0x0 0x2c002000 0 0x1000>,
+		      <0x0 0x2c004000 0 0x2000>,
+		      <0x0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+		clock-frequency = <100000000>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <0 60 4>,
+			     <0 61 4>,
+			     <0 62 4>,
+			     <0 63 4>;
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi
new file mode 100644
index 000000000..c46cbb29f
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi
@@ -0,0 +1,273 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * Motherboard component
+ *
+ * VEMotherBoard.lisa
+ */
+
+	motherboard {
+		arm,v2m-memory-map = "rs1";
+		compatible = "arm,vexpress,v2m-p1", "simple-bus";
+		#address-cells = <2>; /* SMB chipselect number and offset */
+		#size-cells = <1>;
+		#interrupt-cells = <1>;
+		ranges;
+
+		flash@0,00000000 {
+			compatible = "arm,vexpress-flash", "cfi-flash";
+			reg = <0 0x00000000 0x04000000>,
+			      <4 0x00000000 0x04000000>;
+			bank-width = <4>;
+		};
+
+		v2m_video_ram: vram@2,00000000 {
+			compatible = "arm,vexpress-vram";
+			reg = <2 0x00000000 0x00800000>;
+		};
+
+		ethernet@2,02000000 {
+			compatible = "smsc,lan91c111";
+			reg = <2 0x02000000 0x10000>;
+			interrupts = <15>;
+		};
+
+		v2m_clk24mhz: clk24mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <24000000>;
+			clock-output-names = "v2m:clk24mhz";
+		};
+
+		v2m_refclk1mhz: refclk1mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1000000>;
+			clock-output-names = "v2m:refclk1mhz";
+		};
+
+		v2m_refclk32khz: refclk32khz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+			clock-output-names = "v2m:refclk32khz";
+		};
+
+		iofpga@3,00000000 {
+			compatible = "arm,amba-bus", "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 3 0 0x200000>;
+
+			v2m_sysreg: sysreg@010000 {
+				compatible = "arm,vexpress-sysreg";
+				reg = <0x010000 0x1000>;
+				gpio-controller;
+				#gpio-cells = <2>;
+			};
+
+			v2m_sysctl: sysctl@020000 {
+				compatible = "arm,sp810", "arm,primecell";
+				reg = <0x020000 0x1000>;
+				clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&v2m_clk24mhz>;
+				clock-names = "refclk", "timclk", "apb_pclk";
+				#clock-cells = <1>;
+				clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+			};
+
+			aaci@040000 {
+				compatible = "arm,pl041", "arm,primecell";
+				reg = <0x040000 0x1000>;
+				interrupts = <11>;
+				clocks = <&v2m_clk24mhz>;
+				clock-names = "apb_pclk";
+			};
+
+			mmci@050000 {
+				compatible = "arm,pl180", "arm,primecell";
+				reg = <0x050000 0x1000>;
+				interrupts = <9 10>;
+				cd-gpios = <&v2m_sysreg 0 0>;
+				wp-gpios = <&v2m_sysreg 1 0>;
+				max-frequency = <12000000>;
+				vmmc-supply = <&v2m_fixed_3v3>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "mclk", "apb_pclk";
+			};
+
+			kmi@060000 {
+				compatible = "arm,pl050", "arm,primecell";
+				reg = <0x060000 0x1000>;
+				interrupts = <12>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "KMIREFCLK", "apb_pclk";
+			};
+
+			kmi@070000 {
+				compatible = "arm,pl050", "arm,primecell";
+				reg = <0x070000 0x1000>;
+				interrupts = <13>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "KMIREFCLK", "apb_pclk";
+			};
+
+			v2m_serial0: uart@090000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x090000 0x1000>;
+				interrupts = <5>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial1: uart@0a0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0a0000 0x1000>;
+				interrupts = <6>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial2: uart@0b0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0b0000 0x1000>;
+				interrupts = <7>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial3: uart@0c0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0c0000 0x1000>;
+				interrupts = <8>;
+				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			wdt@0f0000 {
+				compatible = "arm,sp805", "arm,primecell";
+				reg = <0x0f0000 0x1000>;
+				interrupts = <0>;
+				clocks = <&v2m_refclk32khz>, <&v2m_clk24mhz>;
+				clock-names = "wdogclk", "apb_pclk";
+			};
+
+			v2m_timer01: timer@110000 {
+				compatible = "arm,sp804", "arm,primecell";
+				reg = <0x110000 0x1000>;
+				interrupts = <2>;
+				clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&v2m_clk24mhz>;
+				clock-names = "timclken1", "timclken2", "apb_pclk";
+			};
+
+			v2m_timer23: timer@120000 {
+				compatible = "arm,sp804", "arm,primecell";
+				reg = <0x120000 0x1000>;
+				interrupts = <3>;
+				clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&v2m_clk24mhz>;
+				clock-names = "timclken1", "timclken2", "apb_pclk";
+			};
+
+			rtc@170000 {
+				compatible = "arm,pl031", "arm,primecell";
+				reg = <0x170000 0x1000>;
+				interrupts = <4>;
+				clocks = <&v2m_clk24mhz>;
+				clock-names = "apb_pclk";
+			};
+
+			clcd@1f0000 {
+				compatible = "arm,pl111", "arm,primecell";
+				reg = <0x1f0000 0x1000>;
+				interrupt-names = "combined";
+				interrupts = <14>;
+				clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
+				clock-names = "clcdclk", "apb_pclk";
+				arm,pl11x,framebuffer = <0x18000000 0x00180000>;
+				memory-region = <&v2m_video_ram>;
+				max-memory-bandwidth = <130000000>; /* 16bpp @ 63.5MHz */
+
+				port {
+					v2m_clcd_pads: endpoint {
+						remote-endpoint = <&v2m_clcd_panel>;
+						arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
+					};
+				};
+
+				panel {
+					compatible = "panel-dpi";
+
+					port {
+						v2m_clcd_panel: endpoint {
+							remote-endpoint = <&v2m_clcd_pads>;
+						};
+					};
+
+					panel-timing {
+						clock-frequency = <63500127>;
+						hactive = <1024>;
+						hback-porch = <152>;
+						hfront-porch = <48>;
+						hsync-len = <104>;
+						vactive = <768>;
+						vback-porch = <23>;
+						vfront-porch = <3>;
+						vsync-len = <4>;
+					};
+				};
+			};
+
+			virtio_block@0130000 {
+				compatible = "virtio,mmio";
+				reg = <0x130000 0x200>;
+				interrupts = <42>;
+			};
+		};
+
+		v2m_fixed_3v3: fixedregulator@0 {
+			compatible = "regulator-fixed";
+			regulator-name = "3V3";
+			regulator-min-microvolt = <3300000>;
+			regulator-max-microvolt = <3300000>;
+			regulator-always-on;
+		};
+
+		mcc {
+			compatible = "arm,vexpress,config-bus";
+			arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+			v2m_oscclk1: osc@1 {
+				/* CLCD clock */
+				compatible = "arm,vexpress-osc";
+				arm,vexpress-sysreg,func = <1 1>;
+				freq-range = <23750000 63500000>;
+				#clock-cells = <0>;
+				clock-output-names = "v2m:oscclk1";
+			};
+
+			reset@0 {
+				compatible = "arm,vexpress-reset";
+				arm,vexpress-sysreg,func = <5 0>;
+			};
+
+			muxfpga@0 {
+				compatible = "arm,vexpress-muxfpga";
+				arm,vexpress-sysreg,func = <7 0>;
+			};
+
+			shutdown@0 {
+				compatible = "arm,vexpress-shutdown";
+				arm,vexpress-sysreg,func = <8 0>;
+			};
+
+			reboot@0 {
+				compatible = "arm,vexpress-reboot";
+				arm,vexpress-sysreg,func = <9 0>;
+			};
+
+			dvimode@0 {
+				compatible = "arm,vexpress-dvimode";
+				arm,vexpress-sysreg,func = <11 0>;
+			};
+		};
+	};
diff --git a/arch/arm64/boot/dts/cavium/Makefile b/arch/arm64/boot/dts/cavium/Makefile
new file mode 100644
index 000000000..e34f89dda
--- /dev/null
+++ b/arch/arm64/boot/dts/cavium/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_THUNDER) += thunder-88xx.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dts b/arch/arm64/boot/dts/cavium/thunder-88xx.dts
new file mode 100644
index 000000000..800ba6599
--- /dev/null
+++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dts
@@ -0,0 +1,67 @@
+/*
+ * Cavium Thunder DTS file - Thunder board description
+ *
+ * Copyright (C) 2014, Cavium Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+/include/ "thunder-88xx.dtsi"
+
+/ {
+	model = "Cavium ThunderX CN88XX board";
+	compatible = "cavium,thunder-88xx";
+
+	aliases {
+		serial0 = &uaa0;
+		serial1 = &uaa1;
+	};
+
+	memory@00000000 {
+		device_type = "memory";
+		reg = <0x0 0x00000000 0x0 0x80000000>;
+	};
+};
diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
new file mode 100644
index 000000000..d8c0bdc51
--- /dev/null
+++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
@@ -0,0 +1,401 @@
+/*
+ * Cavium Thunder DTS file - Thunder SoC description
+ *
+ * Copyright (C) 2014, Cavium Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/ {
+	compatible = "cavium,thunder-88xx";
+	interrupt-parent = <&gic0>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	psci {
+		compatible = "arm,psci-0.2";
+		method = "smc";
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@000 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x000>;
+			enable-method = "psci";
+		};
+		cpu@001 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x001>;
+			enable-method = "psci";
+		};
+		cpu@002 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x002>;
+			enable-method = "psci";
+		};
+		cpu@003 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x003>;
+			enable-method = "psci";
+		};
+		cpu@004 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x004>;
+			enable-method = "psci";
+		};
+		cpu@005 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x005>;
+			enable-method = "psci";
+		};
+		cpu@006 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x006>;
+			enable-method = "psci";
+		};
+		cpu@007 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x007>;
+			enable-method = "psci";
+		};
+		cpu@008 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x008>;
+			enable-method = "psci";
+		};
+		cpu@009 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x009>;
+			enable-method = "psci";
+		};
+		cpu@00a {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x00a>;
+			enable-method = "psci";
+		};
+		cpu@00b {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x00b>;
+			enable-method = "psci";
+		};
+		cpu@00c {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x00c>;
+			enable-method = "psci";
+		};
+		cpu@00d {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x00d>;
+			enable-method = "psci";
+		};
+		cpu@00e {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x00e>;
+			enable-method = "psci";
+		};
+		cpu@00f {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x00f>;
+			enable-method = "psci";
+		};
+		cpu@100 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x100>;
+			enable-method = "psci";
+		};
+		cpu@101 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x101>;
+			enable-method = "psci";
+		};
+		cpu@102 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x102>;
+			enable-method = "psci";
+		};
+		cpu@103 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x103>;
+			enable-method = "psci";
+		};
+		cpu@104 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x104>;
+			enable-method = "psci";
+		};
+		cpu@105 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x105>;
+			enable-method = "psci";
+		};
+		cpu@106 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x106>;
+			enable-method = "psci";
+		};
+		cpu@107 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x107>;
+			enable-method = "psci";
+		};
+		cpu@108 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x108>;
+			enable-method = "psci";
+		};
+		cpu@109 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x109>;
+			enable-method = "psci";
+		};
+		cpu@10a {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x10a>;
+			enable-method = "psci";
+		};
+		cpu@10b {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x10b>;
+			enable-method = "psci";
+		};
+		cpu@10c {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x10c>;
+			enable-method = "psci";
+		};
+		cpu@10d {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x10d>;
+			enable-method = "psci";
+		};
+		cpu@10e {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x10e>;
+			enable-method = "psci";
+		};
+		cpu@10f {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x10f>;
+			enable-method = "psci";
+		};
+		cpu@200 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x200>;
+			enable-method = "psci";
+		};
+		cpu@201 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x201>;
+			enable-method = "psci";
+		};
+		cpu@202 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x202>;
+			enable-method = "psci";
+		};
+		cpu@203 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x203>;
+			enable-method = "psci";
+		};
+		cpu@204 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x204>;
+			enable-method = "psci";
+		};
+		cpu@205 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x205>;
+			enable-method = "psci";
+		};
+		cpu@206 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x206>;
+			enable-method = "psci";
+		};
+		cpu@207 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x207>;
+			enable-method = "psci";
+		};
+		cpu@208 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x208>;
+			enable-method = "psci";
+		};
+		cpu@209 {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x209>;
+			enable-method = "psci";
+		};
+		cpu@20a {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x20a>;
+			enable-method = "psci";
+		};
+		cpu@20b {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x20b>;
+			enable-method = "psci";
+		};
+		cpu@20c {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x20c>;
+			enable-method = "psci";
+		};
+		cpu@20d {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x20d>;
+			enable-method = "psci";
+		};
+		cpu@20e {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x20e>;
+			enable-method = "psci";
+		};
+		cpu@20f {
+			device_type = "cpu";
+			compatible = "cavium,thunder", "arm,armv8";
+			reg = <0x0 0x20f>;
+			enable-method = "psci";
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0xff01>,
+		             <1 14 0xff01>,
+		             <1 11 0xff01>,
+		             <1 10 0xff01>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		refclk50mhz: refclk50mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "refclk50mhz";
+		};
+
+		gic0: interrupt-controller@8010,00000000 {
+			compatible = "arm,gic-v3";
+			#interrupt-cells = <3>;
+			interrupt-controller;
+			reg = <0x8010 0x00000000 0x0 0x010000>, /* GICD */
+			      <0x8010 0x80000000 0x0 0x600000>; /* GICR */
+			interrupts = <1 9 0xf04>;
+		};
+
+		uaa0: serial@87e0,24000000 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x87e0 0x24000000 0x0 0x1000>;
+			interrupts = <1 21 4>;
+			clocks = <&refclk50mhz>;
+			clock-names = "apb_pclk";
+		};
+
+		uaa1: serial@87e0,25000000 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x87e0 0x25000000 0x0 0x1000>;
+			interrupts = <1 22 4>;
+			clocks = <&refclk50mhz>;
+			clock-names = "apb_pclk";
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/exynos/Makefile b/arch/arm64/boot/dts/exynos/Makefile
new file mode 100644
index 000000000..20310e5b6
--- /dev/null
+++ b/arch/arm64/boot/dts/exynos/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_EXYNOS7) += exynos7-espresso.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
new file mode 100644
index 000000000..5424cc450
--- /dev/null
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -0,0 +1,84 @@
+/*
+ * SAMSUNG Exynos7 Espresso board device tree source
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/dts-v1/;
+#include "exynos7.dtsi"
+
+/ {
+	model = "Samsung Exynos7 Espresso board based on EXYNOS7";
+	compatible = "samsung,exynos7-espresso", "samsung,exynos7";
+
+	aliases {
+		serial0 = &serial_2;
+		mshc0 = &mmc_0;
+		mshc2 = &mmc_2;
+	};
+
+	chosen {
+		linux,stdout-path = &serial_2;
+	};
+
+	memory@40000000 {
+		device_type = "memory";
+		reg = <0x0 0x40000000 0x0 0xC0000000>;
+	};
+};
+
+&fin_pll {
+	clock-frequency = <24000000>;
+};
+
+&serial_2 {
+	status = "okay";
+};
+
+&rtc {
+	status = "okay";
+};
+
+&watchdog {
+	status = "okay";
+};
+
+&adc {
+	status = "okay";
+};
+
+&mmc_0 {
+	status = "okay";
+	num-slots = <1>;
+	broken-cd;
+	cap-mmc-highspeed;
+	non-removable;
+	card-detect-delay = <200>;
+	clock-frequency = <800000000>;
+	samsung,dw-mshc-ciu-div = <3>;
+	samsung,dw-mshc-sdr-timing = <0 4>;
+	samsung,dw-mshc-ddr-timing = <0 2>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_qrdy &sd0_bus1 &sd0_bus4 &sd0_bus8>;
+	bus-width = <8>;
+};
+
+&mmc_2 {
+	status = "okay";
+	num-slots = <1>;
+	cap-sd-highspeed;
+	card-detect-delay = <200>;
+	clock-frequency = <400000000>;
+	samsung,dw-mshc-ciu-div = <3>;
+	samsung,dw-mshc-sdr-timing = <2 3>;
+	samsung,dw-mshc-ddr-timing = <1 2>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&sd2_clk &sd2_cmd &sd2_cd &sd2_bus1 &sd2_bus4>;
+	bus-width = <4>;
+	disable-wp;
+};
diff --git a/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi b/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
new file mode 100644
index 000000000..2eef4a279
--- /dev/null
+++ b/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
@@ -0,0 +1,588 @@
+/*
+ * Samsung's Exynos7 SoC pin-mux and pin-config device tree source
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Samsung's Exynos7 SoC pin-mux and pin-config options are listed as
+ * device tree nodes in this file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+&pinctrl_alive {
+	gpa0: gpa0 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		interrupt-parent = <&gic>;
+		#interrupt-cells = <2>;
+		interrupts = <0 0 0>, <0 1 0>, <0 2 0>, <0 3 0>,
+			     <0 4 0>, <0 5 0>, <0 6 0>, <0 7 0>;
+	};
+
+	gpa1: gpa1 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		interrupt-parent = <&gic>;
+		#interrupt-cells = <2>;
+		interrupts = <0 8 0>, <0 9 0>, <0 10 0>, <0 11 0>,
+			     <0 12 0>, <0 13 0>, <0 14 0>, <0 15 0>;
+	};
+
+	gpa2: gpa2 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpa3: gpa3 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+};
+
+&pinctrl_bus0 {
+	gpb0: gpb0 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpc0: gpc0 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpc1: gpc1 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpc2: gpc2 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpc3: gpc3 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd0: gpd0 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd1: gpd1 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd2: gpd2 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd4: gpd4 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd5: gpd5 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd6: gpd6 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd7: gpd7 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpd8: gpd8 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpg0: gpg0 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpg3: gpg3 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	hs_i2c10_bus: hs-i2c10-bus {
+		samsung,pins = "gpb0-1", "gpb0-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c11_bus: hs-i2c11-bus {
+		samsung,pins = "gpb0-3", "gpb0-2";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c2_bus: hs-i2c2-bus {
+		samsung,pins = "gpd0-3", "gpd0-2";
+		samsung,pin-function = <3>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	uart0_data: uart0-data {
+		samsung,pins = "gpd0-0", "gpd0-1";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	uart0_fctl: uart0-fctl {
+		samsung,pins = "gpd0-2", "gpd0-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	uart2_data: uart2-data {
+		samsung,pins = "gpd1-4", "gpd1-5";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c3_bus: hs-i2c3-bus {
+		samsung,pins = "gpd1-3", "gpd1-2";
+		samsung,pin-function = <3>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	uart1_data: uart1-data {
+		samsung,pins = "gpd1-0", "gpd1-1";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	uart1_fctl: uart1-fctl {
+		samsung,pins = "gpd1-2", "gpd1-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c0_bus: hs-i2c0-bus {
+		samsung,pins = "gpd2-1", "gpd2-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c1_bus: hs-i2c1-bus {
+		samsung,pins = "gpd2-3", "gpd2-2";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c9_bus: hs-i2c9-bus {
+		samsung,pins = "gpd2-7", "gpd2-6";
+		samsung,pin-function = <3>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	pwm0_out: pwm0-out {
+		samsung,pins = "gpd2-4";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	pwm1_out: pwm1-out {
+		samsung,pins = "gpd2-5";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	pwm2_out: pwm2-out {
+		samsung,pins = "gpd2-6";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	pwm3_out: pwm3-out {
+		samsung,pins = "gpd2-7";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c8_bus: hs-i2c8-bus {
+		samsung,pins = "gpd5-3", "gpd5-2";
+		samsung,pin-function = <3>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	uart3_data: uart3-data {
+		samsung,pins = "gpd5-0", "gpd5-1";
+		samsung,pin-function = <3>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <0>;
+	};
+
+	spi2_bus: spi2-bus {
+		samsung,pins = "gpd5-0", "gpd5-1", "gpd5-2", "gpd5-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	spi1_bus: spi1-bus {
+		samsung,pins = "gpd6-2", "gpd6-3", "gpd6-4", "gpd6-5";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	spi0_bus: spi0-bus {
+		samsung,pins = "gpd8-0", "gpd8-1", "gpd6-0", "gpd6-1";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c4_bus: hs-i2c4-bus {
+		samsung,pins = "gpg3-1", "gpg3-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+
+	hs_i2c5_bus: hs-i2c5-bus {
+		samsung,pins = "gpg3-3", "gpg3-2";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+};
+
+&pinctrl_nfc {
+	gpj0: gpj0 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	hs_i2c6_bus: hs-i2c6-bus {
+		samsung,pins = "gpj0-1", "gpj0-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+};
+
+&pinctrl_touch {
+	gpj1: gpj1 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	hs_i2c7_bus: hs-i2c7-bus {
+		samsung,pins = "gpj1-1", "gpj1-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+};
+
+&pinctrl_ff {
+	gpg4: gpg4 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	spi3_bus: spi3-bus {
+		samsung,pins = "gpg4-0", "gpg4-1", "gpg4-2", "gpg4-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+};
+
+&pinctrl_ese {
+	gpv7: gpv7 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	spi4_bus: spi4-bus {
+		samsung,pins = "gpv7-0", "gpv7-1", "gpv7-2", "gpv7-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
+};
+
+&pinctrl_fsys0 {
+	gpr4: gpr4 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	sd2_clk: sd2-clk {
+		samsung,pins = "gpr4-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd2_cmd: sd2-cmd {
+		samsung,pins = "gpr4-1";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd2_cd: sd2-cd {
+		samsung,pins = "gpr4-2";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd2_bus1: sd2-bus-width1 {
+		samsung,pins = "gpr4-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd2_bus4: sd2-bus-width4 {
+		samsung,pins = "gpr4-4", "gpr4-5", "gpr4-6";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+};
+
+&pinctrl_fsys1 {
+	gpr0: gpr0 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpr1: gpr1 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpr2: gpr2 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	gpr3: gpr3 {
+		gpio-controller;
+		#gpio-cells = <2>;
+
+		interrupt-controller;
+		#interrupt-cells = <2>;
+	};
+
+	sd0_clk: sd0-clk {
+		samsung,pins = "gpr0-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd0_cmd: sd0-cmd {
+		samsung,pins = "gpr0-1";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd0_ds: sd0-ds {
+		samsung,pins = "gpr0-2";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <1>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd0_qrdy: sd0-qrdy {
+		samsung,pins = "gpr0-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <1>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd0_bus1: sd0-bus-width1 {
+		samsung,pins = "gpr1-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd0_bus4: sd0-bus-width4 {
+		samsung,pins = "gpr1-1", "gpr1-2", "gpr1-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd0_bus8: sd0-bus-width8 {
+		samsung,pins = "gpr1-4", "gpr1-5", "gpr1-6", "gpr1-7";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+
+	sd1_clk: sd1-clk {
+		samsung,pins = "gpr2-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <2>;
+	};
+
+	sd1_cmd: sd1-cmd {
+		samsung,pins = "gpr2-1";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <0>;
+		samsung,pin-drv = <2>;
+	};
+
+	sd1_ds: sd1-ds {
+		samsung,pins = "gpr2-2";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <1>;
+		samsung,pin-drv = <6>;
+	};
+
+	sd1_qrdy: sd1-qrdy {
+		samsung,pins = "gpr2-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <1>;
+		samsung,pin-drv = <6>;
+	};
+
+	sd1_int: sd1-int {
+		samsung,pins = "gpr2-4";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <1>;
+		samsung,pin-drv = <6>;
+	};
+
+	sd1_bus1: sd1-bus-width1 {
+		samsung,pins = "gpr3-0";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <2>;
+	};
+
+	sd1_bus4: sd1-bus-width4 {
+		samsung,pins = "gpr3-1", "gpr3-2", "gpr3-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <2>;
+	};
+
+	sd1_bus8: sd1-bus-width8 {
+		samsung,pins = "gpr3-4", "gpr3-5", "gpr3-6", "gpr3-7";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <2>;
+	};
+};
diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi
new file mode 100644
index 000000000..d7a37c3a6
--- /dev/null
+++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
@@ -0,0 +1,530 @@
+/*
+ * SAMSUNG EXYNOS7 SoC device tree source
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/clock/exynos7-clk.h>
+
+/ {
+	compatible = "samsung,exynos7";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		pinctrl0 = &pinctrl_alive;
+		pinctrl1 = &pinctrl_bus0;
+		pinctrl2 = &pinctrl_nfc;
+		pinctrl3 = &pinctrl_touch;
+		pinctrl4 = &pinctrl_ff;
+		pinctrl5 = &pinctrl_ese;
+		pinctrl6 = &pinctrl_fsys0;
+		pinctrl7 = &pinctrl_fsys1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x0>;
+			enable-method = "psci";
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x1>;
+			enable-method = "psci";
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x2>;
+			enable-method = "psci";
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x3>;
+			enable-method = "psci";
+		};
+	};
+
+	psci {
+		compatible = "arm,psci-0.2";
+		method = "smc";
+	};
+
+	soc: soc {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x18000000>;
+
+		chipid@10000000 {
+			compatible = "samsung,exynos4210-chipid";
+			reg = <0x10000000 0x100>;
+		};
+
+		fin_pll: xxti {
+			compatible = "fixed-clock";
+			clock-output-names = "fin_pll";
+			#clock-cells = <0>;
+		};
+
+		gic: interrupt-controller@11001000 {
+			compatible = "arm,gic-400";
+			#interrupt-cells = <3>;
+			#address-cells = <0>;
+			interrupt-controller;
+			reg =	<0x11001000 0x1000>,
+				<0x11002000 0x1000>,
+				<0x11004000 0x2000>,
+				<0x11006000 0x2000>;
+		};
+
+		clock_topc: clock-controller@10570000 {
+			compatible = "samsung,exynos7-clock-topc";
+			reg = <0x10570000 0x10000>;
+			#clock-cells = <1>;
+		};
+
+		clock_top0: clock-controller@105d0000 {
+			compatible = "samsung,exynos7-clock-top0";
+			reg = <0x105d0000 0xb000>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_topc DOUT_SCLK_BUS0_PLL>,
+				 <&clock_topc DOUT_SCLK_BUS1_PLL>,
+				 <&clock_topc DOUT_SCLK_CC_PLL>,
+				 <&clock_topc DOUT_SCLK_MFC_PLL>;
+			clock-names = "fin_pll", "dout_sclk_bus0_pll",
+				      "dout_sclk_bus1_pll", "dout_sclk_cc_pll",
+				      "dout_sclk_mfc_pll";
+		};
+
+		clock_top1: clock-controller@105e0000 {
+			compatible = "samsung,exynos7-clock-top1";
+			reg = <0x105e0000 0xb000>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_topc DOUT_SCLK_BUS0_PLL>,
+				 <&clock_topc DOUT_SCLK_BUS1_PLL>,
+				 <&clock_topc DOUT_SCLK_CC_PLL>,
+				 <&clock_topc DOUT_SCLK_MFC_PLL>;
+			clock-names = "fin_pll", "dout_sclk_bus0_pll",
+				      "dout_sclk_bus1_pll", "dout_sclk_cc_pll",
+				      "dout_sclk_mfc_pll";
+		};
+
+		clock_ccore: clock-controller@105b0000 {
+			compatible = "samsung,exynos7-clock-ccore";
+			reg = <0x105b0000 0xd00>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_topc DOUT_ACLK_CCORE_133>;
+			clock-names = "fin_pll", "dout_aclk_ccore_133";
+		};
+
+		clock_peric0: clock-controller@13610000 {
+			compatible = "samsung,exynos7-clock-peric0";
+			reg = <0x13610000 0xd00>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_top0 DOUT_ACLK_PERIC0>,
+				 <&clock_top0 CLK_SCLK_UART0>;
+			clock-names = "fin_pll", "dout_aclk_peric0_66",
+				      "sclk_uart0";
+		};
+
+		clock_peric1: clock-controller@14c80000 {
+			compatible = "samsung,exynos7-clock-peric1";
+			reg = <0x14c80000 0xd00>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_top0 DOUT_ACLK_PERIC1>,
+				 <&clock_top0 CLK_SCLK_UART1>,
+				 <&clock_top0 CLK_SCLK_UART2>,
+				 <&clock_top0 CLK_SCLK_UART3>;
+			clock-names = "fin_pll", "dout_aclk_peric1_66",
+				      "sclk_uart1", "sclk_uart2", "sclk_uart3";
+		};
+
+		clock_peris: clock-controller@10040000 {
+			compatible = "samsung,exynos7-clock-peris";
+			reg = <0x10040000 0xd00>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_topc DOUT_ACLK_PERIS>;
+			clock-names = "fin_pll", "dout_aclk_peris_66";
+		};
+
+		clock_fsys0: clock-controller@10e90000 {
+			compatible = "samsung,exynos7-clock-fsys0";
+			reg = <0x10e90000 0xd00>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_top1 DOUT_ACLK_FSYS0_200>,
+				 <&clock_top1 DOUT_SCLK_MMC2>;
+			clock-names = "fin_pll", "dout_aclk_fsys0_200",
+				      "dout_sclk_mmc2";
+		};
+
+		clock_fsys1: clock-controller@156e0000 {
+			compatible = "samsung,exynos7-clock-fsys1";
+			reg = <0x156e0000 0xd00>;
+			#clock-cells = <1>;
+			clocks = <&fin_pll>, <&clock_top1 DOUT_ACLK_FSYS1_200>,
+				 <&clock_top1 DOUT_SCLK_MMC0>,
+				 <&clock_top1 DOUT_SCLK_MMC1>;
+			clock-names = "fin_pll", "dout_aclk_fsys1_200",
+				      "dout_sclk_mmc0", "dout_sclk_mmc1";
+		};
+
+		serial_0: serial@13630000 {
+			compatible = "samsung,exynos4210-uart";
+			reg = <0x13630000 0x100>;
+			interrupts = <0 440 0>;
+			clocks = <&clock_peric0 PCLK_UART0>,
+				 <&clock_peric0 SCLK_UART0>;
+			clock-names = "uart", "clk_uart_baud0";
+			status = "disabled";
+		};
+
+		serial_1: serial@14c20000 {
+			compatible = "samsung,exynos4210-uart";
+			reg = <0x14c20000 0x100>;
+			interrupts = <0 456 0>;
+			clocks = <&clock_peric1 PCLK_UART1>,
+				 <&clock_peric1 SCLK_UART1>;
+			clock-names = "uart", "clk_uart_baud0";
+			status = "disabled";
+		};
+
+		serial_2: serial@14c30000 {
+			compatible = "samsung,exynos4210-uart";
+			reg = <0x14c30000 0x100>;
+			interrupts = <0 457 0>;
+			clocks = <&clock_peric1 PCLK_UART2>,
+				 <&clock_peric1 SCLK_UART2>;
+			clock-names = "uart", "clk_uart_baud0";
+			status = "disabled";
+		};
+
+		serial_3: serial@14c40000 {
+			compatible = "samsung,exynos4210-uart";
+			reg = <0x14c40000 0x100>;
+			interrupts = <0 458 0>;
+			clocks = <&clock_peric1 PCLK_UART3>,
+				 <&clock_peric1 SCLK_UART3>;
+			clock-names = "uart", "clk_uart_baud0";
+			status = "disabled";
+		};
+
+		pinctrl_alive: pinctrl@10580000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x10580000 0x1000>;
+
+			wakeup-interrupt-controller {
+				compatible = "samsung,exynos7-wakeup-eint";
+				interrupt-parent = <&gic>;
+				interrupts = <0 16 0>;
+			};
+		};
+
+		pinctrl_bus0: pinctrl@13470000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x13470000 0x1000>;
+			interrupts = <0 383 0>;
+		};
+
+		pinctrl_nfc: pinctrl@14cd0000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x14cd0000 0x1000>;
+			interrupts = <0 473 0>;
+		};
+
+		pinctrl_touch: pinctrl@14ce0000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x14ce0000 0x1000>;
+			interrupts = <0 474 0>;
+		};
+
+		pinctrl_ff: pinctrl@14c90000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x14c90000 0x1000>;
+			interrupts = <0 475 0>;
+		};
+
+		pinctrl_ese: pinctrl@14ca0000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x14ca0000 0x1000>;
+			interrupts = <0 476 0>;
+		};
+
+		pinctrl_fsys0: pinctrl@10e60000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x10e60000 0x1000>;
+			interrupts = <0 221 0>;
+		};
+
+		pinctrl_fsys1: pinctrl@15690000 {
+			compatible = "samsung,exynos7-pinctrl";
+			reg = <0x15690000 0x1000>;
+			interrupts = <0 203 0>;
+		};
+
+		hsi2c_0: hsi2c@13640000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x13640000 0x1000>;
+			interrupts = <0 441 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c0_bus>;
+			clocks = <&clock_peric0 PCLK_HSI2C0>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_1: hsi2c@13650000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x13650000 0x1000>;
+			interrupts = <0 442 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c1_bus>;
+			clocks = <&clock_peric0 PCLK_HSI2C1>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_2: hsi2c@14e60000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x14e60000 0x1000>;
+			interrupts = <0 459 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c2_bus>;
+			clocks = <&clock_peric1 PCLK_HSI2C2>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_3: hsi2c@14e70000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x14e70000 0x1000>;
+			interrupts = <0 460 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c3_bus>;
+			clocks = <&clock_peric1 PCLK_HSI2C3>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_4: hsi2c@13660000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x13660000 0x1000>;
+			interrupts = <0 443 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c4_bus>;
+			clocks = <&clock_peric0 PCLK_HSI2C4>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_5: hsi2c@13670000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x13670000 0x1000>;
+			interrupts = <0 444 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c5_bus>;
+			clocks = <&clock_peric0 PCLK_HSI2C5>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_6: hsi2c@14e00000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x14e00000 0x1000>;
+			interrupts = <0 461 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c6_bus>;
+			clocks = <&clock_peric1 PCLK_HSI2C6>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_7: hsi2c@13e10000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x13e10000 0x1000>;
+			interrupts = <0 462 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c7_bus>;
+			clocks = <&clock_peric1 PCLK_HSI2C7>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_8: hsi2c@14e20000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x14e20000 0x1000>;
+			interrupts = <0 463 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c8_bus>;
+			clocks = <&clock_peric1 PCLK_HSI2C8>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_9: hsi2c@13680000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x13680000 0x1000>;
+			interrupts = <0 445 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c9_bus>;
+			clocks = <&clock_peric0 PCLK_HSI2C9>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_10: hsi2c@13690000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x13690000 0x1000>;
+			interrupts = <0 446 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c10_bus>;
+			clocks = <&clock_peric0 PCLK_HSI2C10>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		hsi2c_11: hsi2c@136a0000 {
+			compatible = "samsung,exynos7-hsi2c";
+			reg = <0x136a0000 0x1000>;
+			interrupts = <0 447 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&hs_i2c11_bus>;
+			clocks = <&clock_peric0 PCLK_HSI2C11>;
+			clock-names = "hsi2c";
+			status = "disabled";
+		};
+
+		timer {
+			compatible = "arm,armv8-timer";
+			interrupts = <1 13 0xff01>,
+				     <1 14 0xff01>,
+				     <1 11 0xff01>,
+				     <1 10 0xff01>;
+		};
+
+		pmu_system_controller: system-controller@105c0000 {
+			compatible = "samsung,exynos7-pmu", "syscon";
+			reg = <0x105c0000 0x5000>;
+		};
+
+		rtc: rtc@10590000 {
+			compatible = "samsung,s3c6410-rtc";
+			reg = <0x10590000 0x100>;
+			interrupts = <0 355 0>, <0 356 0>;
+			clocks = <&clock_ccore PCLK_RTC>;
+			clock-names = "rtc";
+			status = "disabled";
+		};
+
+		watchdog: watchdog@101d0000 {
+			compatible = "samsung,exynos7-wdt";
+			reg = <0x101d0000 0x100>;
+			interrupts = <0 110 0>;
+			clocks = <&clock_peris PCLK_WDT>;
+			clock-names = "watchdog";
+			samsung,syscon-phandle = <&pmu_system_controller>;
+			status = "disabled";
+		};
+
+		mmc_0: mmc@15740000 {
+			compatible = "samsung,exynos7-dw-mshc-smu";
+			interrupts = <0 201 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x15740000 0x2000>;
+			clocks = <&clock_fsys1 ACLK_MMC0>,
+				 <&clock_top1 CLK_SCLK_MMC0>;
+			clock-names = "biu", "ciu";
+			fifo-depth = <0x40>;
+			status = "disabled";
+		};
+
+		mmc_1: mmc@15750000 {
+			compatible = "samsung,exynos7-dw-mshc";
+			interrupts = <0 202 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x15750000 0x2000>;
+			clocks = <&clock_fsys1 ACLK_MMC1>,
+				 <&clock_top1 CLK_SCLK_MMC1>;
+			clock-names = "biu", "ciu";
+			fifo-depth = <0x40>;
+			status = "disabled";
+		};
+
+		mmc_2: mmc@15560000 {
+			compatible = "samsung,exynos7-dw-mshc-smu";
+			interrupts = <0 216 0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x15560000 0x2000>;
+			clocks = <&clock_fsys0 ACLK_MMC2>,
+				 <&clock_top1 CLK_SCLK_MMC2>;
+			clock-names = "biu", "ciu";
+			fifo-depth = <0x40>;
+			status = "disabled";
+		};
+
+		adc: adc@13620000 {
+			compatible = "samsung,exynos7-adc";
+			reg = <0x13620000 0x100>;
+			interrupts = <0 448 0>;
+			clocks = <&clock_peric0 PCLK_ADCIF>;
+			clock-names = "adc";
+			#io-channel-cells = <1>;
+			io-channel-ranges;
+			status = "disabled";
+		};
+
+		pwm: pwm@136c0000 {
+			compatible = "samsung,exynos4210-pwm";
+			reg = <0x136c0000 0x100>;
+			samsung,pwm-outputs = <0>, <1>, <2>, <3>;
+			#pwm-cells = <3>;
+			clocks = <&clock_peric0 PCLK_PWM>;
+			clock-names = "timers";
+		};
+	};
+};
+
+#include "exynos7-pinctrl.dtsi"
diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile
new file mode 100644
index 000000000..4f2de3e78
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_FSL_LS2085A) += fsl-ls2085a-simu.dtb
+ 
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2085a-simu.dts b/arch/arm64/boot/dts/freescale/fsl-ls2085a-simu.dts
new file mode 100644
index 000000000..82e2a6fcc
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2085a-simu.dts
@@ -0,0 +1,65 @@
+/*
+ * Device Tree file for Freescale LS2085a software Simulator model
+ *
+ * Copyright (C) 2014, Freescale Semiconductor
+ *
+ * Bhupesh Sharma <bhupesh.sharma@freescale.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+/include/ "fsl-ls2085a.dtsi"
+
+/ {
+	model = "Freescale Layerscape 2085a software Simulator model";
+	compatible = "fsl,ls2085a-simu", "fsl,ls2085a";
+
+	ethernet@2210000 {
+		compatible = "smsc,lan91c111";
+		reg = <0x0 0x2210000 0x0 0x100>;
+		interrupts = <0 58 0x1>;
+	};
+};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2085a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2085a.dtsi
new file mode 100644
index 000000000..e281ceb33
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2085a.dtsi
@@ -0,0 +1,163 @@
+/*
+ * Device Tree Include file for Freescale Layerscape-2085A family SoC.
+ *
+ * Copyright (C) 2014, Freescale Semiconductor
+ *
+ * Bhupesh Sharma <bhupesh.sharma@freescale.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPLv2 or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/ {
+	compatible = "fsl,ls2085a";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		/*
+		 * We expect the enable-method for cpu's to be "psci", but this
+		 * is dependent on the SoC FW, which will fill this in.
+		 *
+		 * Currently supported enable-method is psci v0.2
+		 */
+
+		/* We have 4 clusters having 2 Cortex-A57 cores each */
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x1>;
+		};
+
+		cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x100>;
+		};
+
+		cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x101>;
+		};
+
+		cpu@200 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x200>;
+		};
+
+		cpu@201 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x201>;
+		};
+
+		cpu@300 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x300>;
+		};
+
+		cpu@301 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x301>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000 0 0x80000000>;
+		      /* DRAM space - 1, size : 2 GB DRAM */
+	};
+
+	gic: interrupt-controller@6000000 {
+		compatible = "arm,gic-v3";
+		reg = <0x0 0x06000000 0 0x10000>, /* GIC Dist */
+		      <0x0 0x06100000 0 0x100000>; /* GICR (RD_base + SGI_base) */
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		interrupts = <1 9 0x4>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0x8>, /* Physical Secure PPI, active-low */
+			     <1 14 0x8>, /* Physical Non-Secure PPI, active-low */
+			     <1 11 0x8>, /* Virtual PPI, active-low */
+			     <1 10 0x8>; /* Hypervisor PPI, active-low */
+	};
+
+	serial0: serial@21c0500 {
+		device_type = "serial";
+		compatible = "fsl,ns16550", "ns16550a";
+		reg = <0x0 0x21c0500 0x0 0x100>;
+		clock-frequency = <0>;	/* Updated by bootloader */
+		interrupts = <0 32 0x1>; /* edge triggered */
+	};
+
+	serial1: serial@21c0600 {
+		device_type = "serial";
+		compatible = "fsl,ns16550", "ns16550a";
+		reg = <0x0 0x21c0600 0x0 0x100>;
+		clock-frequency = <0>; 	/* Updated by bootloader */
+		interrupts = <0 32 0x1>; /* edge triggered */
+	};
+
+	fsl_mc: fsl-mc@80c000000 {
+		compatible = "fsl,qoriq-mc";
+		reg = <0x00000008 0x0c000000 0 0x40>,	 /* MC portal base */
+		      <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
+	};
+};
diff --git a/arch/arm64/boot/dts/include/dt-bindings b/arch/arm64/boot/dts/include/dt-bindings
new file mode 120000
index 000000000..08c00e497
--- /dev/null
+++ b/arch/arm64/boot/dts/include/dt-bindings
@@ -0,0 +1 @@
+../../../../../include/dt-bindings
+\ No newline at end of file
diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
new file mode 100644
index 000000000..3ce24622b
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-evb.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
new file mode 100644
index 000000000..d0ab012fa
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2014 MediaTek Inc.
+ * Author: Eddie Huang <eddie.huang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/dts-v1/;
+#include "mt8173.dtsi"
+
+/ {
+	model = "MediaTek MT8173 evaluation board";
+	compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
+
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart1;
+		serial2 = &uart2;
+		serial3 = &uart3;
+	};
+
+	memory@40000000 {
+		device_type = "memory";
+		reg = <0 0x40000000 0 0x80000000>;
+	};
+
+	chosen { };
+};
+
+&uart0 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-pinfunc.h b/arch/arm64/boot/dts/mediatek/mt8173-pinfunc.h
new file mode 100644
index 000000000..d2f3809af
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8173-pinfunc.h
@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2014 MediaTek Inc.
+ * Author: Hongzhou.Yang <hongzhou.yang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DTS_MT8173_PINFUNC_H
+#define __DTS_MT8173_PINFUNC_H
+
+#include <dt-bindings/pinctrl/mt65xx.h>
+
+#define MT8173_PIN_0_EINT0__FUNC_GPIO0 (MTK_PIN_NO(0) | 0)
+#define MT8173_PIN_0_EINT0__FUNC_IRDA_PDN (MTK_PIN_NO(0) | 1)
+#define MT8173_PIN_0_EINT0__FUNC_I2S1_WS (MTK_PIN_NO(0) | 2)
+#define MT8173_PIN_0_EINT0__FUNC_AUD_SPDIF (MTK_PIN_NO(0) | 3)
+#define MT8173_PIN_0_EINT0__FUNC_UTXD0 (MTK_PIN_NO(0) | 4)
+#define MT8173_PIN_0_EINT0__FUNC_DBG_MON_A_20_ (MTK_PIN_NO(0) | 7)
+
+#define MT8173_PIN_1_EINT1__FUNC_GPIO1 (MTK_PIN_NO(1) | 0)
+#define MT8173_PIN_1_EINT1__FUNC_IRDA_RXD (MTK_PIN_NO(1) | 1)
+#define MT8173_PIN_1_EINT1__FUNC_I2S1_BCK (MTK_PIN_NO(1) | 2)
+#define MT8173_PIN_1_EINT1__FUNC_SDA5 (MTK_PIN_NO(1) | 3)
+#define MT8173_PIN_1_EINT1__FUNC_URXD0 (MTK_PIN_NO(1) | 4)
+#define MT8173_PIN_1_EINT1__FUNC_DBG_MON_A_21_ (MTK_PIN_NO(1) | 7)
+
+#define MT8173_PIN_2_EINT2__FUNC_GPIO2 (MTK_PIN_NO(2) | 0)
+#define MT8173_PIN_2_EINT2__FUNC_IRDA_TXD (MTK_PIN_NO(2) | 1)
+#define MT8173_PIN_2_EINT2__FUNC_I2S1_MCK (MTK_PIN_NO(2) | 2)
+#define MT8173_PIN_2_EINT2__FUNC_SCL5 (MTK_PIN_NO(2) | 3)
+#define MT8173_PIN_2_EINT2__FUNC_UTXD3 (MTK_PIN_NO(2) | 4)
+#define MT8173_PIN_2_EINT2__FUNC_DBG_MON_A_22_ (MTK_PIN_NO(2) | 7)
+
+#define MT8173_PIN_3_EINT3__FUNC_GPIO3 (MTK_PIN_NO(3) | 0)
+#define MT8173_PIN_3_EINT3__FUNC_DSI1_TE (MTK_PIN_NO(3) | 1)
+#define MT8173_PIN_3_EINT3__FUNC_I2S1_DO_1 (MTK_PIN_NO(3) | 2)
+#define MT8173_PIN_3_EINT3__FUNC_SDA3 (MTK_PIN_NO(3) | 3)
+#define MT8173_PIN_3_EINT3__FUNC_URXD3 (MTK_PIN_NO(3) | 4)
+#define MT8173_PIN_3_EINT3__FUNC_DBG_MON_A_23_ (MTK_PIN_NO(3) | 7)
+
+#define MT8173_PIN_4_EINT4__FUNC_GPIO4 (MTK_PIN_NO(4) | 0)
+#define MT8173_PIN_4_EINT4__FUNC_DISP_PWM1 (MTK_PIN_NO(4) | 1)
+#define MT8173_PIN_4_EINT4__FUNC_I2S1_DO_2 (MTK_PIN_NO(4) | 2)
+#define MT8173_PIN_4_EINT4__FUNC_SCL3 (MTK_PIN_NO(4) | 3)
+#define MT8173_PIN_4_EINT4__FUNC_UCTS3 (MTK_PIN_NO(4) | 4)
+#define MT8173_PIN_4_EINT4__FUNC_SFWP_B (MTK_PIN_NO(4) | 6)
+
+#define MT8173_PIN_5_EINT5__FUNC_GPIO5 (MTK_PIN_NO(5) | 0)
+#define MT8173_PIN_5_EINT5__FUNC_PCM1_CLK (MTK_PIN_NO(5) | 1)
+#define MT8173_PIN_5_EINT5__FUNC_I2S2_WS (MTK_PIN_NO(5) | 2)
+#define MT8173_PIN_5_EINT5__FUNC_SPI_CK_3_ (MTK_PIN_NO(5) | 3)
+#define MT8173_PIN_5_EINT5__FUNC_URTS3 (MTK_PIN_NO(5) | 4)
+#define MT8173_PIN_5_EINT5__FUNC_AP_MD32_JTAG_TMS (MTK_PIN_NO(5) | 5)
+#define MT8173_PIN_5_EINT5__FUNC_SFOUT (MTK_PIN_NO(5) | 6)
+
+#define MT8173_PIN_6_EINT6__FUNC_GPIO6 (MTK_PIN_NO(6) | 0)
+#define MT8173_PIN_6_EINT6__FUNC_PCM1_SYNC (MTK_PIN_NO(6) | 1)
+#define MT8173_PIN_6_EINT6__FUNC_I2S2_BCK (MTK_PIN_NO(6) | 2)
+#define MT8173_PIN_6_EINT6__FUNC_SPI_MI_3_ (MTK_PIN_NO(6) | 3)
+#define MT8173_PIN_6_EINT6__FUNC_AP_MD32_JTAG_TCK (MTK_PIN_NO(6) | 5)
+#define MT8173_PIN_6_EINT6__FUNC_SFCS0 (MTK_PIN_NO(6) | 6)
+
+#define MT8173_PIN_7_EINT7__FUNC_GPIO7 (MTK_PIN_NO(7) | 0)
+#define MT8173_PIN_7_EINT7__FUNC_PCM1_DI (MTK_PIN_NO(7) | 1)
+#define MT8173_PIN_7_EINT7__FUNC_I2S2_DI_1 (MTK_PIN_NO(7) | 2)
+#define MT8173_PIN_7_EINT7__FUNC_SPI_MO_3_ (MTK_PIN_NO(7) | 3)
+#define MT8173_PIN_7_EINT7__FUNC_AP_MD32_JTAG_TDI (MTK_PIN_NO(7) | 5)
+#define MT8173_PIN_7_EINT7__FUNC_SFHOLD (MTK_PIN_NO(7) | 6)
+
+#define MT8173_PIN_8_EINT8__FUNC_GPIO8 (MTK_PIN_NO(8) | 0)
+#define MT8173_PIN_8_EINT8__FUNC_PCM1_DO (MTK_PIN_NO(8) | 1)
+#define MT8173_PIN_8_EINT8__FUNC_I2S2_DI_2 (MTK_PIN_NO(8) | 2)
+#define MT8173_PIN_8_EINT8__FUNC_SPI_CS_3_ (MTK_PIN_NO(8) | 3)
+#define MT8173_PIN_8_EINT8__FUNC_AUD_SPDIF (MTK_PIN_NO(8) | 4)
+#define MT8173_PIN_8_EINT8__FUNC_AP_MD32_JTAG_TDO (MTK_PIN_NO(8) | 5)
+#define MT8173_PIN_8_EINT8__FUNC_SFIN (MTK_PIN_NO(8) | 6)
+
+#define MT8173_PIN_9_EINT9__FUNC_GPIO9 (MTK_PIN_NO(9) | 0)
+#define MT8173_PIN_9_EINT9__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(9) | 1)
+#define MT8173_PIN_9_EINT9__FUNC_I2S2_MCK (MTK_PIN_NO(9) | 2)
+#define MT8173_PIN_9_EINT9__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(9) | 4)
+#define MT8173_PIN_9_EINT9__FUNC_AP_MD32_JTAG_TRST (MTK_PIN_NO(9) | 5)
+#define MT8173_PIN_9_EINT9__FUNC_SFCK (MTK_PIN_NO(9) | 6)
+
+#define MT8173_PIN_10_EINT10__FUNC_GPIO10 (MTK_PIN_NO(10) | 0)
+#define MT8173_PIN_10_EINT10__FUNC_CLKM0 (MTK_PIN_NO(10) | 1)
+#define MT8173_PIN_10_EINT10__FUNC_DSI1_TE (MTK_PIN_NO(10) | 2)
+#define MT8173_PIN_10_EINT10__FUNC_DISP_PWM1 (MTK_PIN_NO(10) | 3)
+#define MT8173_PIN_10_EINT10__FUNC_PWM4 (MTK_PIN_NO(10) | 4)
+#define MT8173_PIN_10_EINT10__FUNC_IRDA_RXD (MTK_PIN_NO(10) | 5)
+
+#define MT8173_PIN_11_EINT11__FUNC_GPIO11 (MTK_PIN_NO(11) | 0)
+#define MT8173_PIN_11_EINT11__FUNC_CLKM1 (MTK_PIN_NO(11) | 1)
+#define MT8173_PIN_11_EINT11__FUNC_I2S3_WS (MTK_PIN_NO(11) | 2)
+#define MT8173_PIN_11_EINT11__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(11) | 3)
+#define MT8173_PIN_11_EINT11__FUNC_PWM5 (MTK_PIN_NO(11) | 4)
+#define MT8173_PIN_11_EINT11__FUNC_IRDA_TXD (MTK_PIN_NO(11) | 5)
+#define MT8173_PIN_11_EINT11__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(11) | 6)
+#define MT8173_PIN_11_EINT11__FUNC_DBG_MON_B_30_ (MTK_PIN_NO(11) | 7)
+
+#define MT8173_PIN_12_EINT12__FUNC_GPIO12 (MTK_PIN_NO(12) | 0)
+#define MT8173_PIN_12_EINT12__FUNC_CLKM2 (MTK_PIN_NO(12) | 1)
+#define MT8173_PIN_12_EINT12__FUNC_I2S3_BCK (MTK_PIN_NO(12) | 2)
+#define MT8173_PIN_12_EINT12__FUNC_SRCLKENA0 (MTK_PIN_NO(12) | 3)
+#define MT8173_PIN_12_EINT12__FUNC_I2S2_WS (MTK_PIN_NO(12) | 5)
+#define MT8173_PIN_12_EINT12__FUNC_DBG_MON_B_32_ (MTK_PIN_NO(12) | 7)
+
+#define MT8173_PIN_13_EINT13__FUNC_GPIO13 (MTK_PIN_NO(13) | 0)
+#define MT8173_PIN_13_EINT13__FUNC_CLKM3 (MTK_PIN_NO(13) | 1)
+#define MT8173_PIN_13_EINT13__FUNC_I2S3_MCK (MTK_PIN_NO(13) | 2)
+#define MT8173_PIN_13_EINT13__FUNC_SRCLKENA0 (MTK_PIN_NO(13) | 3)
+#define MT8173_PIN_13_EINT13__FUNC_I2S2_BCK (MTK_PIN_NO(13) | 5)
+#define MT8173_PIN_13_EINT13__FUNC_DBG_MON_A_32_ (MTK_PIN_NO(13) | 7)
+
+#define MT8173_PIN_14_EINT14__FUNC_GPIO14 (MTK_PIN_NO(14) | 0)
+#define MT8173_PIN_14_EINT14__FUNC_CMDAT0 (MTK_PIN_NO(14) | 1)
+#define MT8173_PIN_14_EINT14__FUNC_CMCSD0 (MTK_PIN_NO(14) | 2)
+#define MT8173_PIN_14_EINT14__FUNC_CLKM2 (MTK_PIN_NO(14) | 4)
+#define MT8173_PIN_14_EINT14__FUNC_DBG_MON_B_6_ (MTK_PIN_NO(14) | 7)
+
+#define MT8173_PIN_15_EINT15__FUNC_GPIO15 (MTK_PIN_NO(15) | 0)
+#define MT8173_PIN_15_EINT15__FUNC_CMDAT1 (MTK_PIN_NO(15) | 1)
+#define MT8173_PIN_15_EINT15__FUNC_CMCSD1 (MTK_PIN_NO(15) | 2)
+#define MT8173_PIN_15_EINT15__FUNC_CMFLASH (MTK_PIN_NO(15) | 3)
+#define MT8173_PIN_15_EINT15__FUNC_CLKM3 (MTK_PIN_NO(15) | 4)
+#define MT8173_PIN_15_EINT15__FUNC_DBG_MON_B_29_ (MTK_PIN_NO(15) | 7)
+
+#define MT8173_PIN_16_IDDIG__FUNC_GPIO16 (MTK_PIN_NO(16) | 0)
+#define MT8173_PIN_16_IDDIG__FUNC_IDDIG (MTK_PIN_NO(16) | 1)
+#define MT8173_PIN_16_IDDIG__FUNC_CMFLASH (MTK_PIN_NO(16) | 2)
+#define MT8173_PIN_16_IDDIG__FUNC_PWM5 (MTK_PIN_NO(16) | 4)
+
+#define MT8173_PIN_17_WATCHDOG__FUNC_GPIO17 (MTK_PIN_NO(17) | 0)
+#define MT8173_PIN_17_WATCHDOG__FUNC_WATCHDOG_AO (MTK_PIN_NO(17) | 1)
+
+#define MT8173_PIN_18_CEC__FUNC_GPIO18 (MTK_PIN_NO(18) | 0)
+#define MT8173_PIN_18_CEC__FUNC_CEC (MTK_PIN_NO(18) | 1)
+
+#define MT8173_PIN_19_HDMISCK__FUNC_GPIO19 (MTK_PIN_NO(19) | 0)
+#define MT8173_PIN_19_HDMISCK__FUNC_HDMISCK (MTK_PIN_NO(19) | 1)
+#define MT8173_PIN_19_HDMISCK__FUNC_HDCP_SCL (MTK_PIN_NO(19) | 2)
+
+#define MT8173_PIN_20_HDMISD__FUNC_GPIO20 (MTK_PIN_NO(20) | 0)
+#define MT8173_PIN_20_HDMISD__FUNC_HDMISD (MTK_PIN_NO(20) | 1)
+#define MT8173_PIN_20_HDMISD__FUNC_HDCP_SDA (MTK_PIN_NO(20) | 2)
+
+#define MT8173_PIN_21_HTPLG__FUNC_GPIO21 (MTK_PIN_NO(21) | 0)
+#define MT8173_PIN_21_HTPLG__FUNC_HTPLG (MTK_PIN_NO(21) | 1)
+
+#define MT8173_PIN_22_MSDC3_DAT0__FUNC_GPIO22 (MTK_PIN_NO(22) | 0)
+#define MT8173_PIN_22_MSDC3_DAT0__FUNC_MSDC3_DAT0 (MTK_PIN_NO(22) | 1)
+
+#define MT8173_PIN_23_MSDC3_DAT1__FUNC_GPIO23 (MTK_PIN_NO(23) | 0)
+#define MT8173_PIN_23_MSDC3_DAT1__FUNC_MSDC3_DAT1 (MTK_PIN_NO(23) | 1)
+
+#define MT8173_PIN_24_MSDC3_DAT2__FUNC_GPIO24 (MTK_PIN_NO(24) | 0)
+#define MT8173_PIN_24_MSDC3_DAT2__FUNC_MSDC3_DAT2 (MTK_PIN_NO(24) | 1)
+
+#define MT8173_PIN_25_MSDC3_DAT3__FUNC_GPIO25 (MTK_PIN_NO(25) | 0)
+#define MT8173_PIN_25_MSDC3_DAT3__FUNC_MSDC3_DAT3 (MTK_PIN_NO(25) | 1)
+
+#define MT8173_PIN_26_MSDC3_CLK__FUNC_GPIO26 (MTK_PIN_NO(26) | 0)
+#define MT8173_PIN_26_MSDC3_CLK__FUNC_MSDC3_CLK (MTK_PIN_NO(26) | 1)
+
+#define MT8173_PIN_27_MSDC3_CMD__FUNC_GPIO27 (MTK_PIN_NO(27) | 0)
+#define MT8173_PIN_27_MSDC3_CMD__FUNC_MSDC3_CMD (MTK_PIN_NO(27) | 1)
+
+#define MT8173_PIN_28_MSDC3_DSL__FUNC_GPIO28 (MTK_PIN_NO(28) | 0)
+#define MT8173_PIN_28_MSDC3_DSL__FUNC_MSDC3_DSL (MTK_PIN_NO(28) | 1)
+
+#define MT8173_PIN_29_UCTS2__FUNC_GPIO29 (MTK_PIN_NO(29) | 0)
+#define MT8173_PIN_29_UCTS2__FUNC_UCTS2 (MTK_PIN_NO(29) | 1)
+
+#define MT8173_PIN_30_URTS2__FUNC_GPIO30 (MTK_PIN_NO(30) | 0)
+#define MT8173_PIN_30_URTS2__FUNC_URTS2 (MTK_PIN_NO(30) | 1)
+
+#define MT8173_PIN_31_URXD2__FUNC_GPIO31 (MTK_PIN_NO(31) | 0)
+#define MT8173_PIN_31_URXD2__FUNC_URXD2 (MTK_PIN_NO(31) | 1)
+#define MT8173_PIN_31_URXD2__FUNC_UTXD2 (MTK_PIN_NO(31) | 2)
+
+#define MT8173_PIN_32_UTXD2__FUNC_GPIO32 (MTK_PIN_NO(32) | 0)
+#define MT8173_PIN_32_UTXD2__FUNC_UTXD2 (MTK_PIN_NO(32) | 1)
+#define MT8173_PIN_32_UTXD2__FUNC_URXD2 (MTK_PIN_NO(32) | 2)
+
+#define MT8173_PIN_33_DAICLK__FUNC_GPIO33 (MTK_PIN_NO(33) | 0)
+#define MT8173_PIN_33_DAICLK__FUNC_MRG_CLK (MTK_PIN_NO(33) | 1)
+#define MT8173_PIN_33_DAICLK__FUNC_PCM0_CLK (MTK_PIN_NO(33) | 2)
+
+#define MT8173_PIN_34_DAIPCMIN__FUNC_GPIO34 (MTK_PIN_NO(34) | 0)
+#define MT8173_PIN_34_DAIPCMIN__FUNC_MRG_DI (MTK_PIN_NO(34) | 1)
+#define MT8173_PIN_34_DAIPCMIN__FUNC_PCM0_DI (MTK_PIN_NO(34) | 2)
+
+#define MT8173_PIN_35_DAIPCMOUT__FUNC_GPIO35 (MTK_PIN_NO(35) | 0)
+#define MT8173_PIN_35_DAIPCMOUT__FUNC_MRG_DO (MTK_PIN_NO(35) | 1)
+#define MT8173_PIN_35_DAIPCMOUT__FUNC_PCM0_DO (MTK_PIN_NO(35) | 2)
+
+#define MT8173_PIN_36_DAISYNC__FUNC_GPIO36 (MTK_PIN_NO(36) | 0)
+#define MT8173_PIN_36_DAISYNC__FUNC_MRG_SYNC (MTK_PIN_NO(36) | 1)
+#define MT8173_PIN_36_DAISYNC__FUNC_PCM0_SYNC (MTK_PIN_NO(36) | 2)
+
+#define MT8173_PIN_37_EINT16__FUNC_GPIO37 (MTK_PIN_NO(37) | 0)
+#define MT8173_PIN_37_EINT16__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(37) | 1)
+#define MT8173_PIN_37_EINT16__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(37) | 2)
+#define MT8173_PIN_37_EINT16__FUNC_PWM0 (MTK_PIN_NO(37) | 3)
+#define MT8173_PIN_37_EINT16__FUNC_PWM1 (MTK_PIN_NO(37) | 4)
+#define MT8173_PIN_37_EINT16__FUNC_PWM2 (MTK_PIN_NO(37) | 5)
+#define MT8173_PIN_37_EINT16__FUNC_CLKM0 (MTK_PIN_NO(37) | 6)
+
+#define MT8173_PIN_38_CONN_RST__FUNC_GPIO38 (MTK_PIN_NO(38) | 0)
+#define MT8173_PIN_38_CONN_RST__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(38) | 1)
+#define MT8173_PIN_38_CONN_RST__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(38) | 2)
+#define MT8173_PIN_38_CONN_RST__FUNC_CLKM1 (MTK_PIN_NO(38) | 6)
+
+#define MT8173_PIN_39_CM2MCLK__FUNC_GPIO39 (MTK_PIN_NO(39) | 0)
+#define MT8173_PIN_39_CM2MCLK__FUNC_CM2MCLK (MTK_PIN_NO(39) | 1)
+#define MT8173_PIN_39_CM2MCLK__FUNC_CMCSD0 (MTK_PIN_NO(39) | 2)
+#define MT8173_PIN_39_CM2MCLK__FUNC_DBG_MON_A_17_ (MTK_PIN_NO(39) | 7)
+
+#define MT8173_PIN_40_CMPCLK__FUNC_GPIO40 (MTK_PIN_NO(40) | 0)
+#define MT8173_PIN_40_CMPCLK__FUNC_CMPCLK (MTK_PIN_NO(40) | 1)
+#define MT8173_PIN_40_CMPCLK__FUNC_CMCSK (MTK_PIN_NO(40) | 2)
+#define MT8173_PIN_40_CMPCLK__FUNC_CMCSD2 (MTK_PIN_NO(40) | 3)
+#define MT8173_PIN_40_CMPCLK__FUNC_DBG_MON_A_18_ (MTK_PIN_NO(40) | 7)
+
+#define MT8173_PIN_41_CMMCLK__FUNC_GPIO41 (MTK_PIN_NO(41) | 0)
+#define MT8173_PIN_41_CMMCLK__FUNC_CMMCLK (MTK_PIN_NO(41) | 1)
+#define MT8173_PIN_41_CMMCLK__FUNC_DBG_MON_A_19_ (MTK_PIN_NO(41) | 7)
+
+#define MT8173_PIN_42_DSI_TE__FUNC_GPIO42 (MTK_PIN_NO(42) | 0)
+#define MT8173_PIN_42_DSI_TE__FUNC_DSI_TE (MTK_PIN_NO(42) | 1)
+
+#define MT8173_PIN_43_SDA2__FUNC_GPIO43 (MTK_PIN_NO(43) | 0)
+#define MT8173_PIN_43_SDA2__FUNC_SDA2 (MTK_PIN_NO(43) | 1)
+
+#define MT8173_PIN_44_SCL2__FUNC_GPIO44 (MTK_PIN_NO(44) | 0)
+#define MT8173_PIN_44_SCL2__FUNC_SCL2 (MTK_PIN_NO(44) | 1)
+
+#define MT8173_PIN_45_SDA0__FUNC_GPIO45 (MTK_PIN_NO(45) | 0)
+#define MT8173_PIN_45_SDA0__FUNC_SDA0 (MTK_PIN_NO(45) | 1)
+
+#define MT8173_PIN_46_SCL0__FUNC_GPIO46 (MTK_PIN_NO(46) | 0)
+#define MT8173_PIN_46_SCL0__FUNC_SCL0 (MTK_PIN_NO(46) | 1)
+
+#define MT8173_PIN_47_RDN0_A__FUNC_GPIO47 (MTK_PIN_NO(47) | 0)
+#define MT8173_PIN_47_RDN0_A__FUNC_CMDAT2 (MTK_PIN_NO(47) | 1)
+
+#define MT8173_PIN_48_RDP0_A__FUNC_GPIO48 (MTK_PIN_NO(48) | 0)
+#define MT8173_PIN_48_RDP0_A__FUNC_CMDAT3 (MTK_PIN_NO(48) | 1)
+
+#define MT8173_PIN_49_RDN1_A__FUNC_GPIO49 (MTK_PIN_NO(49) | 0)
+#define MT8173_PIN_49_RDN1_A__FUNC_CMDAT4 (MTK_PIN_NO(49) | 1)
+
+#define MT8173_PIN_50_RDP1_A__FUNC_GPIO50 (MTK_PIN_NO(50) | 0)
+#define MT8173_PIN_50_RDP1_A__FUNC_CMDAT5 (MTK_PIN_NO(50) | 1)
+
+#define MT8173_PIN_51_RCN_A__FUNC_GPIO51 (MTK_PIN_NO(51) | 0)
+#define MT8173_PIN_51_RCN_A__FUNC_CMDAT6 (MTK_PIN_NO(51) | 1)
+
+#define MT8173_PIN_52_RCP_A__FUNC_GPIO52 (MTK_PIN_NO(52) | 0)
+#define MT8173_PIN_52_RCP_A__FUNC_CMDAT7 (MTK_PIN_NO(52) | 1)
+
+#define MT8173_PIN_53_RDN2_A__FUNC_GPIO53 (MTK_PIN_NO(53) | 0)
+#define MT8173_PIN_53_RDN2_A__FUNC_CMDAT8 (MTK_PIN_NO(53) | 1)
+#define MT8173_PIN_53_RDN2_A__FUNC_CMCSD3 (MTK_PIN_NO(53) | 2)
+
+#define MT8173_PIN_54_RDP2_A__FUNC_GPIO54 (MTK_PIN_NO(54) | 0)
+#define MT8173_PIN_54_RDP2_A__FUNC_CMDAT9 (MTK_PIN_NO(54) | 1)
+#define MT8173_PIN_54_RDP2_A__FUNC_CMCSD2 (MTK_PIN_NO(54) | 2)
+
+#define MT8173_PIN_55_RDN3_A__FUNC_GPIO55 (MTK_PIN_NO(55) | 0)
+#define MT8173_PIN_55_RDN3_A__FUNC_CMHSYNC (MTK_PIN_NO(55) | 1)
+#define MT8173_PIN_55_RDN3_A__FUNC_CMCSD1 (MTK_PIN_NO(55) | 2)
+
+#define MT8173_PIN_56_RDP3_A__FUNC_GPIO56 (MTK_PIN_NO(56) | 0)
+#define MT8173_PIN_56_RDP3_A__FUNC_CMVSYNC (MTK_PIN_NO(56) | 1)
+#define MT8173_PIN_56_RDP3_A__FUNC_CMCSD0 (MTK_PIN_NO(56) | 2)
+
+#define MT8173_PIN_57_MSDC0_DAT0__FUNC_GPIO57 (MTK_PIN_NO(57) | 0)
+#define MT8173_PIN_57_MSDC0_DAT0__FUNC_MSDC0_DAT0 (MTK_PIN_NO(57) | 1)
+#define MT8173_PIN_57_MSDC0_DAT0__FUNC_I2S1_WS (MTK_PIN_NO(57) | 2)
+#define MT8173_PIN_57_MSDC0_DAT0__FUNC_DBG_MON_B_7_ (MTK_PIN_NO(57) | 7)
+
+#define MT8173_PIN_58_MSDC0_DAT1__FUNC_GPIO58 (MTK_PIN_NO(58) | 0)
+#define MT8173_PIN_58_MSDC0_DAT1__FUNC_MSDC0_DAT1 (MTK_PIN_NO(58) | 1)
+#define MT8173_PIN_58_MSDC0_DAT1__FUNC_I2S1_BCK (MTK_PIN_NO(58) | 2)
+#define MT8173_PIN_58_MSDC0_DAT1__FUNC_DBG_MON_B_8_ (MTK_PIN_NO(58) | 7)
+
+#define MT8173_PIN_59_MSDC0_DAT2__FUNC_GPIO59 (MTK_PIN_NO(59) | 0)
+#define MT8173_PIN_59_MSDC0_DAT2__FUNC_MSDC0_DAT2 (MTK_PIN_NO(59) | 1)
+#define MT8173_PIN_59_MSDC0_DAT2__FUNC_I2S1_MCK (MTK_PIN_NO(59) | 2)
+#define MT8173_PIN_59_MSDC0_DAT2__FUNC_DBG_MON_B_9_ (MTK_PIN_NO(59) | 7)
+
+#define MT8173_PIN_60_MSDC0_DAT3__FUNC_GPIO60 (MTK_PIN_NO(60) | 0)
+#define MT8173_PIN_60_MSDC0_DAT3__FUNC_MSDC0_DAT3 (MTK_PIN_NO(60) | 1)
+#define MT8173_PIN_60_MSDC0_DAT3__FUNC_I2S1_DO_1 (MTK_PIN_NO(60) | 2)
+#define MT8173_PIN_60_MSDC0_DAT3__FUNC_DBG_MON_B_10_ (MTK_PIN_NO(60) | 7)
+
+#define MT8173_PIN_61_MSDC0_DAT4__FUNC_GPIO61 (MTK_PIN_NO(61) | 0)
+#define MT8173_PIN_61_MSDC0_DAT4__FUNC_MSDC0_DAT4 (MTK_PIN_NO(61) | 1)
+#define MT8173_PIN_61_MSDC0_DAT4__FUNC_I2S1_DO_2 (MTK_PIN_NO(61) | 2)
+#define MT8173_PIN_61_MSDC0_DAT4__FUNC_DBG_MON_B_11_ (MTK_PIN_NO(61) | 7)
+
+#define MT8173_PIN_62_MSDC0_DAT5__FUNC_GPIO62 (MTK_PIN_NO(62) | 0)
+#define MT8173_PIN_62_MSDC0_DAT5__FUNC_MSDC0_DAT5 (MTK_PIN_NO(62) | 1)
+#define MT8173_PIN_62_MSDC0_DAT5__FUNC_I2S2_WS (MTK_PIN_NO(62) | 2)
+#define MT8173_PIN_62_MSDC0_DAT5__FUNC_DBG_MON_B_12_ (MTK_PIN_NO(62) | 7)
+
+#define MT8173_PIN_63_MSDC0_DAT6__FUNC_GPIO63 (MTK_PIN_NO(63) | 0)
+#define MT8173_PIN_63_MSDC0_DAT6__FUNC_MSDC0_DAT6 (MTK_PIN_NO(63) | 1)
+#define MT8173_PIN_63_MSDC0_DAT6__FUNC_I2S2_BCK (MTK_PIN_NO(63) | 2)
+#define MT8173_PIN_63_MSDC0_DAT6__FUNC_DBG_MON_B_13_ (MTK_PIN_NO(63) | 7)
+
+#define MT8173_PIN_64_MSDC0_DAT7__FUNC_GPIO64 (MTK_PIN_NO(64) | 0)
+#define MT8173_PIN_64_MSDC0_DAT7__FUNC_MSDC0_DAT7 (MTK_PIN_NO(64) | 1)
+#define MT8173_PIN_64_MSDC0_DAT7__FUNC_I2S2_DI_1 (MTK_PIN_NO(64) | 2)
+#define MT8173_PIN_64_MSDC0_DAT7__FUNC_DBG_MON_B_14_ (MTK_PIN_NO(64) | 7)
+
+#define MT8173_PIN_65_MSDC0_CLK__FUNC_GPIO65 (MTK_PIN_NO(65) | 0)
+#define MT8173_PIN_65_MSDC0_CLK__FUNC_MSDC0_CLK (MTK_PIN_NO(65) | 1)
+#define MT8173_PIN_65_MSDC0_CLK__FUNC_DBG_MON_B_16_ (MTK_PIN_NO(65) | 7)
+
+#define MT8173_PIN_66_MSDC0_CMD__FUNC_GPIO66 (MTK_PIN_NO(66) | 0)
+#define MT8173_PIN_66_MSDC0_CMD__FUNC_MSDC0_CMD (MTK_PIN_NO(66) | 1)
+#define MT8173_PIN_66_MSDC0_CMD__FUNC_I2S2_DI_2 (MTK_PIN_NO(66) | 2)
+#define MT8173_PIN_66_MSDC0_CMD__FUNC_DBG_MON_B_15_ (MTK_PIN_NO(66) | 7)
+
+#define MT8173_PIN_67_MSDC0_DSL__FUNC_GPIO67 (MTK_PIN_NO(67) | 0)
+#define MT8173_PIN_67_MSDC0_DSL__FUNC_MSDC0_DSL (MTK_PIN_NO(67) | 1)
+#define MT8173_PIN_67_MSDC0_DSL__FUNC_DBG_MON_B_17_ (MTK_PIN_NO(67) | 7)
+
+#define MT8173_PIN_68_MSDC0_RST___FUNC_GPIO68 (MTK_PIN_NO(68) | 0)
+#define MT8173_PIN_68_MSDC0_RST___FUNC_MSDC0_RSTB (MTK_PIN_NO(68) | 1)
+#define MT8173_PIN_68_MSDC0_RST___FUNC_I2S2_MCK (MTK_PIN_NO(68) | 2)
+#define MT8173_PIN_68_MSDC0_RST___FUNC_DBG_MON_B_18_ (MTK_PIN_NO(68) | 7)
+
+#define MT8173_PIN_69_SPI_CK__FUNC_GPIO69 (MTK_PIN_NO(69) | 0)
+#define MT8173_PIN_69_SPI_CK__FUNC_SPI_CK_0_ (MTK_PIN_NO(69) | 1)
+#define MT8173_PIN_69_SPI_CK__FUNC_I2S3_DO_1 (MTK_PIN_NO(69) | 2)
+#define MT8173_PIN_69_SPI_CK__FUNC_PWM0 (MTK_PIN_NO(69) | 3)
+#define MT8173_PIN_69_SPI_CK__FUNC_PWM5 (MTK_PIN_NO(69) | 4)
+#define MT8173_PIN_69_SPI_CK__FUNC_I2S2_MCK (MTK_PIN_NO(69) | 5)
+#define MT8173_PIN_69_SPI_CK__FUNC_DBG_MON_B_19_ (MTK_PIN_NO(69) | 7)
+
+#define MT8173_PIN_70_SPI_MI__FUNC_GPIO70 (MTK_PIN_NO(70) | 0)
+#define MT8173_PIN_70_SPI_MI__FUNC_SPI_MI_0_ (MTK_PIN_NO(70) | 1)
+#define MT8173_PIN_70_SPI_MI__FUNC_I2S3_DO_2 (MTK_PIN_NO(70) | 2)
+#define MT8173_PIN_70_SPI_MI__FUNC_PWM1 (MTK_PIN_NO(70) | 3)
+#define MT8173_PIN_70_SPI_MI__FUNC_SPI_MO_0_ (MTK_PIN_NO(70) | 4)
+#define MT8173_PIN_70_SPI_MI__FUNC_I2S2_DI_1 (MTK_PIN_NO(70) | 5)
+#define MT8173_PIN_70_SPI_MI__FUNC_DSI1_TE (MTK_PIN_NO(70) | 6)
+#define MT8173_PIN_70_SPI_MI__FUNC_DBG_MON_B_20_ (MTK_PIN_NO(70) | 7)
+
+#define MT8173_PIN_71_SPI_MO__FUNC_GPIO71 (MTK_PIN_NO(71) | 0)
+#define MT8173_PIN_71_SPI_MO__FUNC_SPI_MO_0_ (MTK_PIN_NO(71) | 1)
+#define MT8173_PIN_71_SPI_MO__FUNC_I2S3_DO_3 (MTK_PIN_NO(71) | 2)
+#define MT8173_PIN_71_SPI_MO__FUNC_PWM2 (MTK_PIN_NO(71) | 3)
+#define MT8173_PIN_71_SPI_MO__FUNC_SPI_MI_0_ (MTK_PIN_NO(71) | 4)
+#define MT8173_PIN_71_SPI_MO__FUNC_I2S2_DI_2 (MTK_PIN_NO(71) | 5)
+#define MT8173_PIN_71_SPI_MO__FUNC_DBG_MON_B_21_ (MTK_PIN_NO(71) | 7)
+
+#define MT8173_PIN_72_SPI_CS__FUNC_GPIO72 (MTK_PIN_NO(72) | 0)
+#define MT8173_PIN_72_SPI_CS__FUNC_SPI_CS_0_ (MTK_PIN_NO(72) | 1)
+#define MT8173_PIN_72_SPI_CS__FUNC_I2S3_DO_4 (MTK_PIN_NO(72) | 2)
+#define MT8173_PIN_72_SPI_CS__FUNC_PWM3 (MTK_PIN_NO(72) | 3)
+#define MT8173_PIN_72_SPI_CS__FUNC_PWM6 (MTK_PIN_NO(72) | 4)
+#define MT8173_PIN_72_SPI_CS__FUNC_DISP_PWM1 (MTK_PIN_NO(72) | 5)
+#define MT8173_PIN_72_SPI_CS__FUNC_DBG_MON_B_22_ (MTK_PIN_NO(72) | 7)
+
+#define MT8173_PIN_73_MSDC1_DAT0__FUNC_GPIO73 (MTK_PIN_NO(73) | 0)
+#define MT8173_PIN_73_MSDC1_DAT0__FUNC_MSDC1_DAT0 (MTK_PIN_NO(73) | 1)
+#define MT8173_PIN_73_MSDC1_DAT0__FUNC_DBG_MON_B_24_ (MTK_PIN_NO(73) | 7)
+
+#define MT8173_PIN_74_MSDC1_DAT1__FUNC_GPIO74 (MTK_PIN_NO(74) | 0)
+#define MT8173_PIN_74_MSDC1_DAT1__FUNC_MSDC1_DAT1 (MTK_PIN_NO(74) | 1)
+#define MT8173_PIN_74_MSDC1_DAT1__FUNC_DBG_MON_B_25_ (MTK_PIN_NO(74) | 7)
+
+#define MT8173_PIN_75_MSDC1_DAT2__FUNC_GPIO75 (MTK_PIN_NO(75) | 0)
+#define MT8173_PIN_75_MSDC1_DAT2__FUNC_MSDC1_DAT2 (MTK_PIN_NO(75) | 1)
+#define MT8173_PIN_75_MSDC1_DAT2__FUNC_DBG_MON_B_26_ (MTK_PIN_NO(75) | 7)
+
+#define MT8173_PIN_76_MSDC1_DAT3__FUNC_GPIO76 (MTK_PIN_NO(76) | 0)
+#define MT8173_PIN_76_MSDC1_DAT3__FUNC_MSDC1_DAT3 (MTK_PIN_NO(76) | 1)
+#define MT8173_PIN_76_MSDC1_DAT3__FUNC_DBG_MON_B_27_ (MTK_PIN_NO(76) | 7)
+
+#define MT8173_PIN_77_MSDC1_CLK__FUNC_GPIO77 (MTK_PIN_NO(77) | 0)
+#define MT8173_PIN_77_MSDC1_CLK__FUNC_MSDC1_CLK (MTK_PIN_NO(77) | 1)
+#define MT8173_PIN_77_MSDC1_CLK__FUNC_DBG_MON_B_28_ (MTK_PIN_NO(77) | 7)
+
+#define MT8173_PIN_78_MSDC1_CMD__FUNC_GPIO78 (MTK_PIN_NO(78) | 0)
+#define MT8173_PIN_78_MSDC1_CMD__FUNC_MSDC1_CMD (MTK_PIN_NO(78) | 1)
+#define MT8173_PIN_78_MSDC1_CMD__FUNC_DBG_MON_B_23_ (MTK_PIN_NO(78) | 7)
+
+#define MT8173_PIN_79_PWRAP_SPI0_MI__FUNC_GPIO79 (MTK_PIN_NO(79) | 0)
+#define MT8173_PIN_79_PWRAP_SPI0_MI__FUNC_PWRAP_SPIMI (MTK_PIN_NO(79) | 1)
+#define MT8173_PIN_79_PWRAP_SPI0_MI__FUNC_PWRAP_SPIMO (MTK_PIN_NO(79) | 2)
+
+#define MT8173_PIN_80_PWRAP_SPI0_MO__FUNC_GPIO80 (MTK_PIN_NO(80) | 0)
+#define MT8173_PIN_80_PWRAP_SPI0_MO__FUNC_PWRAP_SPIMO (MTK_PIN_NO(80) | 1)
+#define MT8173_PIN_80_PWRAP_SPI0_MO__FUNC_PWRAP_SPIMI (MTK_PIN_NO(80) | 2)
+
+#define MT8173_PIN_81_PWRAP_SPI0_CK__FUNC_GPIO81 (MTK_PIN_NO(81) | 0)
+#define MT8173_PIN_81_PWRAP_SPI0_CK__FUNC_PWRAP_SPICK (MTK_PIN_NO(81) | 1)
+
+#define MT8173_PIN_82_PWRAP_SPI0_CSN__FUNC_GPIO82 (MTK_PIN_NO(82) | 0)
+#define MT8173_PIN_82_PWRAP_SPI0_CSN__FUNC_PWRAP_SPICS (MTK_PIN_NO(82) | 1)
+
+#define MT8173_PIN_83_AUD_CLK_MOSI__FUNC_GPIO83 (MTK_PIN_NO(83) | 0)
+#define MT8173_PIN_83_AUD_CLK_MOSI__FUNC_AUD_CLK_MOSI (MTK_PIN_NO(83) | 1)
+
+#define MT8173_PIN_84_AUD_DAT_MISO__FUNC_GPIO84 (MTK_PIN_NO(84) | 0)
+#define MT8173_PIN_84_AUD_DAT_MISO__FUNC_AUD_DAT_MISO (MTK_PIN_NO(84) | 1)
+#define MT8173_PIN_84_AUD_DAT_MISO__FUNC_AUD_DAT_MOSI (MTK_PIN_NO(84) | 2)
+
+#define MT8173_PIN_85_AUD_DAT_MOSI__FUNC_GPIO85 (MTK_PIN_NO(85) | 0)
+#define MT8173_PIN_85_AUD_DAT_MOSI__FUNC_AUD_DAT_MOSI (MTK_PIN_NO(85) | 1)
+#define MT8173_PIN_85_AUD_DAT_MOSI__FUNC_AUD_DAT_MISO (MTK_PIN_NO(85) | 2)
+
+#define MT8173_PIN_86_RTC32K_CK__FUNC_GPIO86 (MTK_PIN_NO(86) | 0)
+#define MT8173_PIN_86_RTC32K_CK__FUNC_RTC32K_CK (MTK_PIN_NO(86) | 1)
+
+#define MT8173_PIN_87_DISP_PWM0__FUNC_GPIO87 (MTK_PIN_NO(87) | 0)
+#define MT8173_PIN_87_DISP_PWM0__FUNC_DISP_PWM0 (MTK_PIN_NO(87) | 1)
+#define MT8173_PIN_87_DISP_PWM0__FUNC_DISP_PWM1 (MTK_PIN_NO(87) | 2)
+#define MT8173_PIN_87_DISP_PWM0__FUNC_DBG_MON_B_31_ (MTK_PIN_NO(87) | 7)
+
+#define MT8173_PIN_88_SRCLKENAI__FUNC_GPIO88 (MTK_PIN_NO(88) | 0)
+#define MT8173_PIN_88_SRCLKENAI__FUNC_SRCLKENAI (MTK_PIN_NO(88) | 1)
+
+#define MT8173_PIN_89_SRCLKENAI2__FUNC_GPIO89 (MTK_PIN_NO(89) | 0)
+#define MT8173_PIN_89_SRCLKENAI2__FUNC_SRCLKENAI2 (MTK_PIN_NO(89) | 1)
+
+#define MT8173_PIN_90_SRCLKENA0__FUNC_GPIO90 (MTK_PIN_NO(90) | 0)
+#define MT8173_PIN_90_SRCLKENA0__FUNC_SRCLKENA0 (MTK_PIN_NO(90) | 1)
+
+#define MT8173_PIN_91_SRCLKENA1__FUNC_GPIO91 (MTK_PIN_NO(91) | 0)
+#define MT8173_PIN_91_SRCLKENA1__FUNC_SRCLKENA1 (MTK_PIN_NO(91) | 1)
+
+#define MT8173_PIN_92_PCM_CLK__FUNC_GPIO92 (MTK_PIN_NO(92) | 0)
+#define MT8173_PIN_92_PCM_CLK__FUNC_PCM1_CLK (MTK_PIN_NO(92) | 1)
+#define MT8173_PIN_92_PCM_CLK__FUNC_I2S0_BCK (MTK_PIN_NO(92) | 2)
+#define MT8173_PIN_92_PCM_CLK__FUNC_DBG_MON_A_24_ (MTK_PIN_NO(92) | 7)
+
+#define MT8173_PIN_93_PCM_SYNC__FUNC_GPIO93 (MTK_PIN_NO(93) | 0)
+#define MT8173_PIN_93_PCM_SYNC__FUNC_PCM1_SYNC (MTK_PIN_NO(93) | 1)
+#define MT8173_PIN_93_PCM_SYNC__FUNC_I2S0_WS (MTK_PIN_NO(93) | 2)
+#define MT8173_PIN_93_PCM_SYNC__FUNC_DBG_MON_A_25_ (MTK_PIN_NO(93) | 7)
+
+#define MT8173_PIN_94_PCM_RX__FUNC_GPIO94 (MTK_PIN_NO(94) | 0)
+#define MT8173_PIN_94_PCM_RX__FUNC_PCM1_DI (MTK_PIN_NO(94) | 1)
+#define MT8173_PIN_94_PCM_RX__FUNC_I2S0_DI (MTK_PIN_NO(94) | 2)
+#define MT8173_PIN_94_PCM_RX__FUNC_DBG_MON_A_26_ (MTK_PIN_NO(94) | 7)
+
+#define MT8173_PIN_95_PCM_TX__FUNC_GPIO95 (MTK_PIN_NO(95) | 0)
+#define MT8173_PIN_95_PCM_TX__FUNC_PCM1_DO (MTK_PIN_NO(95) | 1)
+#define MT8173_PIN_95_PCM_TX__FUNC_I2S0_DO (MTK_PIN_NO(95) | 2)
+#define MT8173_PIN_95_PCM_TX__FUNC_DBG_MON_A_27_ (MTK_PIN_NO(95) | 7)
+
+#define MT8173_PIN_96_URXD1__FUNC_GPIO96 (MTK_PIN_NO(96) | 0)
+#define MT8173_PIN_96_URXD1__FUNC_URXD1 (MTK_PIN_NO(96) | 1)
+#define MT8173_PIN_96_URXD1__FUNC_UTXD1 (MTK_PIN_NO(96) | 2)
+#define MT8173_PIN_96_URXD1__FUNC_DBG_MON_A_28_ (MTK_PIN_NO(96) | 7)
+
+#define MT8173_PIN_97_UTXD1__FUNC_GPIO97 (MTK_PIN_NO(97) | 0)
+#define MT8173_PIN_97_UTXD1__FUNC_UTXD1 (MTK_PIN_NO(97) | 1)
+#define MT8173_PIN_97_UTXD1__FUNC_URXD1 (MTK_PIN_NO(97) | 2)
+#define MT8173_PIN_97_UTXD1__FUNC_DBG_MON_A_29_ (MTK_PIN_NO(97) | 7)
+
+#define MT8173_PIN_98_URTS1__FUNC_GPIO98 (MTK_PIN_NO(98) | 0)
+#define MT8173_PIN_98_URTS1__FUNC_URTS1 (MTK_PIN_NO(98) | 1)
+#define MT8173_PIN_98_URTS1__FUNC_UCTS1 (MTK_PIN_NO(98) | 2)
+#define MT8173_PIN_98_URTS1__FUNC_DBG_MON_A_30_ (MTK_PIN_NO(98) | 7)
+
+#define MT8173_PIN_99_UCTS1__FUNC_GPIO99 (MTK_PIN_NO(99) | 0)
+#define MT8173_PIN_99_UCTS1__FUNC_UCTS1 (MTK_PIN_NO(99) | 1)
+#define MT8173_PIN_99_UCTS1__FUNC_URTS1 (MTK_PIN_NO(99) | 2)
+#define MT8173_PIN_99_UCTS1__FUNC_DBG_MON_A_31_ (MTK_PIN_NO(99) | 7)
+
+#define MT8173_PIN_100_MSDC2_DAT0__FUNC_GPIO100 (MTK_PIN_NO(100) | 0)
+#define MT8173_PIN_100_MSDC2_DAT0__FUNC_MSDC2_DAT0 (MTK_PIN_NO(100) | 1)
+#define MT8173_PIN_100_MSDC2_DAT0__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(100) | 3)
+#define MT8173_PIN_100_MSDC2_DAT0__FUNC_SDA5 (MTK_PIN_NO(100) | 4)
+#define MT8173_PIN_100_MSDC2_DAT0__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(100) | 5)
+#define MT8173_PIN_100_MSDC2_DAT0__FUNC_DBG_MON_B_0_ (MTK_PIN_NO(100) | 7)
+
+#define MT8173_PIN_101_MSDC2_DAT1__FUNC_GPIO101 (MTK_PIN_NO(101) | 0)
+#define MT8173_PIN_101_MSDC2_DAT1__FUNC_MSDC2_DAT1 (MTK_PIN_NO(101) | 1)
+#define MT8173_PIN_101_MSDC2_DAT1__FUNC_AUD_SPDIF (MTK_PIN_NO(101) | 3)
+#define MT8173_PIN_101_MSDC2_DAT1__FUNC_SCL5 (MTK_PIN_NO(101) | 4)
+#define MT8173_PIN_101_MSDC2_DAT1__FUNC_DBG_MON_B_1_ (MTK_PIN_NO(101) | 7)
+
+#define MT8173_PIN_102_MSDC2_DAT2__FUNC_GPIO102 (MTK_PIN_NO(102) | 0)
+#define MT8173_PIN_102_MSDC2_DAT2__FUNC_MSDC2_DAT2 (MTK_PIN_NO(102) | 1)
+#define MT8173_PIN_102_MSDC2_DAT2__FUNC_UTXD0 (MTK_PIN_NO(102) | 3)
+#define MT8173_PIN_102_MSDC2_DAT2__FUNC_PWM0 (MTK_PIN_NO(102) | 5)
+#define MT8173_PIN_102_MSDC2_DAT2__FUNC_SPI_CK_1_ (MTK_PIN_NO(102) | 6)
+#define MT8173_PIN_102_MSDC2_DAT2__FUNC_DBG_MON_B_2_ (MTK_PIN_NO(102) | 7)
+
+#define MT8173_PIN_103_MSDC2_DAT3__FUNC_GPIO103 (MTK_PIN_NO(103) | 0)
+#define MT8173_PIN_103_MSDC2_DAT3__FUNC_MSDC2_DAT3 (MTK_PIN_NO(103) | 1)
+#define MT8173_PIN_103_MSDC2_DAT3__FUNC_URXD0 (MTK_PIN_NO(103) | 3)
+#define MT8173_PIN_103_MSDC2_DAT3__FUNC_PWM1 (MTK_PIN_NO(103) | 5)
+#define MT8173_PIN_103_MSDC2_DAT3__FUNC_SPI_MI_1_ (MTK_PIN_NO(103) | 6)
+#define MT8173_PIN_103_MSDC2_DAT3__FUNC_DBG_MON_B_3_ (MTK_PIN_NO(103) | 7)
+
+#define MT8173_PIN_104_MSDC2_CLK__FUNC_GPIO104 (MTK_PIN_NO(104) | 0)
+#define MT8173_PIN_104_MSDC2_CLK__FUNC_MSDC2_CLK (MTK_PIN_NO(104) | 1)
+#define MT8173_PIN_104_MSDC2_CLK__FUNC_UTXD3 (MTK_PIN_NO(104) | 3)
+#define MT8173_PIN_104_MSDC2_CLK__FUNC_SDA3 (MTK_PIN_NO(104) | 4)
+#define MT8173_PIN_104_MSDC2_CLK__FUNC_PWM2 (MTK_PIN_NO(104) | 5)
+#define MT8173_PIN_104_MSDC2_CLK__FUNC_SPI_MO_1_ (MTK_PIN_NO(104) | 6)
+#define MT8173_PIN_104_MSDC2_CLK__FUNC_DBG_MON_B_4_ (MTK_PIN_NO(104) | 7)
+
+#define MT8173_PIN_105_MSDC2_CMD__FUNC_GPIO105 (MTK_PIN_NO(105) | 0)
+#define MT8173_PIN_105_MSDC2_CMD__FUNC_MSDC2_CMD (MTK_PIN_NO(105) | 1)
+#define MT8173_PIN_105_MSDC2_CMD__FUNC_URXD3 (MTK_PIN_NO(105) | 3)
+#define MT8173_PIN_105_MSDC2_CMD__FUNC_SCL3 (MTK_PIN_NO(105) | 4)
+#define MT8173_PIN_105_MSDC2_CMD__FUNC_PWM3 (MTK_PIN_NO(105) | 5)
+#define MT8173_PIN_105_MSDC2_CMD__FUNC_SPI_CS_1_ (MTK_PIN_NO(105) | 6)
+#define MT8173_PIN_105_MSDC2_CMD__FUNC_DBG_MON_B_5_ (MTK_PIN_NO(105) | 7)
+
+#define MT8173_PIN_106_SDA3__FUNC_GPIO106 (MTK_PIN_NO(106) | 0)
+#define MT8173_PIN_106_SDA3__FUNC_SDA3 (MTK_PIN_NO(106) | 1)
+
+#define MT8173_PIN_107_SCL3__FUNC_GPIO107 (MTK_PIN_NO(107) | 0)
+#define MT8173_PIN_107_SCL3__FUNC_SCL3 (MTK_PIN_NO(107) | 1)
+
+#define MT8173_PIN_108_JTMS__FUNC_GPIO108 (MTK_PIN_NO(108) | 0)
+#define MT8173_PIN_108_JTMS__FUNC_JTMS (MTK_PIN_NO(108) | 1)
+#define MT8173_PIN_108_JTMS__FUNC_MFG_JTAG_TMS (MTK_PIN_NO(108) | 2)
+#define MT8173_PIN_108_JTMS__FUNC_AP_MD32_JTAG_TMS (MTK_PIN_NO(108) | 5)
+#define MT8173_PIN_108_JTMS__FUNC_DFD_TMS (MTK_PIN_NO(108) | 6)
+
+#define MT8173_PIN_109_JTCK__FUNC_GPIO109 (MTK_PIN_NO(109) | 0)
+#define MT8173_PIN_109_JTCK__FUNC_JTCK (MTK_PIN_NO(109) | 1)
+#define MT8173_PIN_109_JTCK__FUNC_MFG_JTAG_TCK (MTK_PIN_NO(109) | 2)
+#define MT8173_PIN_109_JTCK__FUNC_AP_MD32_JTAG_TCK (MTK_PIN_NO(109) | 5)
+#define MT8173_PIN_109_JTCK__FUNC_DFD_TCK (MTK_PIN_NO(109) | 6)
+
+#define MT8173_PIN_110_JTDI__FUNC_GPIO110 (MTK_PIN_NO(110) | 0)
+#define MT8173_PIN_110_JTDI__FUNC_JTDI (MTK_PIN_NO(110) | 1)
+#define MT8173_PIN_110_JTDI__FUNC_MFG_JTAG_TDI (MTK_PIN_NO(110) | 2)
+#define MT8173_PIN_110_JTDI__FUNC_AP_MD32_JTAG_TDI (MTK_PIN_NO(110) | 5)
+#define MT8173_PIN_110_JTDI__FUNC_DFD_TDI (MTK_PIN_NO(110) | 6)
+
+#define MT8173_PIN_111_JTDO__FUNC_GPIO111 (MTK_PIN_NO(111) | 0)
+#define MT8173_PIN_111_JTDO__FUNC_JTDO (MTK_PIN_NO(111) | 1)
+#define MT8173_PIN_111_JTDO__FUNC_MFG_JTAG_TDO (MTK_PIN_NO(111) | 2)
+#define MT8173_PIN_111_JTDO__FUNC_AP_MD32_JTAG_TDO (MTK_PIN_NO(111) | 5)
+#define MT8173_PIN_111_JTDO__FUNC_DFD_TDO (MTK_PIN_NO(111) | 6)
+
+#define MT8173_PIN_112_JTRST_B__FUNC_GPIO112 (MTK_PIN_NO(112) | 0)
+#define MT8173_PIN_112_JTRST_B__FUNC_JTRST_B (MTK_PIN_NO(112) | 1)
+#define MT8173_PIN_112_JTRST_B__FUNC_MFG_JTAG_TRSTN (MTK_PIN_NO(112) | 2)
+#define MT8173_PIN_112_JTRST_B__FUNC_AP_MD32_JTAG_TRST (MTK_PIN_NO(112) | 5)
+#define MT8173_PIN_112_JTRST_B__FUNC_DFD_NTRST (MTK_PIN_NO(112) | 6)
+
+#define MT8173_PIN_113_URXD0__FUNC_GPIO113 (MTK_PIN_NO(113) | 0)
+#define MT8173_PIN_113_URXD0__FUNC_URXD0 (MTK_PIN_NO(113) | 1)
+#define MT8173_PIN_113_URXD0__FUNC_UTXD0 (MTK_PIN_NO(113) | 2)
+#define MT8173_PIN_113_URXD0__FUNC_I2S2_WS (MTK_PIN_NO(113) | 6)
+#define MT8173_PIN_113_URXD0__FUNC_DBG_MON_A_0_ (MTK_PIN_NO(113) | 7)
+
+#define MT8173_PIN_114_UTXD0__FUNC_GPIO114 (MTK_PIN_NO(114) | 0)
+#define MT8173_PIN_114_UTXD0__FUNC_UTXD0 (MTK_PIN_NO(114) | 1)
+#define MT8173_PIN_114_UTXD0__FUNC_URXD0 (MTK_PIN_NO(114) | 2)
+#define MT8173_PIN_114_UTXD0__FUNC_I2S2_BCK (MTK_PIN_NO(114) | 6)
+#define MT8173_PIN_114_UTXD0__FUNC_DBG_MON_A_1_ (MTK_PIN_NO(114) | 7)
+
+#define MT8173_PIN_115_URTS0__FUNC_GPIO115 (MTK_PIN_NO(115) | 0)
+#define MT8173_PIN_115_URTS0__FUNC_URTS0 (MTK_PIN_NO(115) | 1)
+#define MT8173_PIN_115_URTS0__FUNC_UCTS0 (MTK_PIN_NO(115) | 2)
+#define MT8173_PIN_115_URTS0__FUNC_I2S2_MCK (MTK_PIN_NO(115) | 6)
+#define MT8173_PIN_115_URTS0__FUNC_DBG_MON_A_2_ (MTK_PIN_NO(115) | 7)
+
+#define MT8173_PIN_116_UCTS0__FUNC_GPIO116 (MTK_PIN_NO(116) | 0)
+#define MT8173_PIN_116_UCTS0__FUNC_UCTS0 (MTK_PIN_NO(116) | 1)
+#define MT8173_PIN_116_UCTS0__FUNC_URTS0 (MTK_PIN_NO(116) | 2)
+#define MT8173_PIN_116_UCTS0__FUNC_I2S2_DI_1 (MTK_PIN_NO(116) | 6)
+#define MT8173_PIN_116_UCTS0__FUNC_DBG_MON_A_3_ (MTK_PIN_NO(116) | 7)
+
+#define MT8173_PIN_117_URXD3__FUNC_GPIO117 (MTK_PIN_NO(117) | 0)
+#define MT8173_PIN_117_URXD3__FUNC_URXD3 (MTK_PIN_NO(117) | 1)
+#define MT8173_PIN_117_URXD3__FUNC_UTXD3 (MTK_PIN_NO(117) | 2)
+#define MT8173_PIN_117_URXD3__FUNC_DBG_MON_A_9_ (MTK_PIN_NO(117) | 7)
+
+#define MT8173_PIN_118_UTXD3__FUNC_GPIO118 (MTK_PIN_NO(118) | 0)
+#define MT8173_PIN_118_UTXD3__FUNC_UTXD3 (MTK_PIN_NO(118) | 1)
+#define MT8173_PIN_118_UTXD3__FUNC_URXD3 (MTK_PIN_NO(118) | 2)
+#define MT8173_PIN_118_UTXD3__FUNC_DBG_MON_A_10_ (MTK_PIN_NO(118) | 7)
+
+#define MT8173_PIN_119_KPROW0__FUNC_GPIO119 (MTK_PIN_NO(119) | 0)
+#define MT8173_PIN_119_KPROW0__FUNC_KROW0 (MTK_PIN_NO(119) | 1)
+#define MT8173_PIN_119_KPROW0__FUNC_DBG_MON_A_11_ (MTK_PIN_NO(119) | 7)
+
+#define MT8173_PIN_120_KPROW1__FUNC_GPIO120 (MTK_PIN_NO(120) | 0)
+#define MT8173_PIN_120_KPROW1__FUNC_KROW1 (MTK_PIN_NO(120) | 1)
+#define MT8173_PIN_120_KPROW1__FUNC_PWM6 (MTK_PIN_NO(120) | 3)
+#define MT8173_PIN_120_KPROW1__FUNC_DBG_MON_A_12_ (MTK_PIN_NO(120) | 7)
+
+#define MT8173_PIN_121_KPROW2__FUNC_GPIO121 (MTK_PIN_NO(121) | 0)
+#define MT8173_PIN_121_KPROW2__FUNC_KROW2 (MTK_PIN_NO(121) | 1)
+#define MT8173_PIN_121_KPROW2__FUNC_IRDA_PDN (MTK_PIN_NO(121) | 2)
+#define MT8173_PIN_121_KPROW2__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(121) | 3)
+#define MT8173_PIN_121_KPROW2__FUNC_PWM4 (MTK_PIN_NO(121) | 4)
+#define MT8173_PIN_121_KPROW2__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(121) | 5)
+#define MT8173_PIN_121_KPROW2__FUNC_DBG_MON_A_13_ (MTK_PIN_NO(121) | 7)
+
+#define MT8173_PIN_122_KPCOL0__FUNC_GPIO122 (MTK_PIN_NO(122) | 0)
+#define MT8173_PIN_122_KPCOL0__FUNC_KCOL0 (MTK_PIN_NO(122) | 1)
+#define MT8173_PIN_122_KPCOL0__FUNC_DBG_MON_A_14_ (MTK_PIN_NO(122) | 7)
+
+#define MT8173_PIN_123_KPCOL1__FUNC_GPIO123 (MTK_PIN_NO(123) | 0)
+#define MT8173_PIN_123_KPCOL1__FUNC_KCOL1 (MTK_PIN_NO(123) | 1)
+#define MT8173_PIN_123_KPCOL1__FUNC_IRDA_RXD (MTK_PIN_NO(123) | 2)
+#define MT8173_PIN_123_KPCOL1__FUNC_PWM5 (MTK_PIN_NO(123) | 3)
+#define MT8173_PIN_123_KPCOL1__FUNC_DBG_MON_A_15_ (MTK_PIN_NO(123) | 7)
+
+#define MT8173_PIN_124_KPCOL2__FUNC_GPIO124 (MTK_PIN_NO(124) | 0)
+#define MT8173_PIN_124_KPCOL2__FUNC_KCOL2 (MTK_PIN_NO(124) | 1)
+#define MT8173_PIN_124_KPCOL2__FUNC_IRDA_TXD (MTK_PIN_NO(124) | 2)
+#define MT8173_PIN_124_KPCOL2__FUNC_USB_DRVVBUS_P0 (MTK_PIN_NO(124) | 3)
+#define MT8173_PIN_124_KPCOL2__FUNC_PWM3 (MTK_PIN_NO(124) | 4)
+#define MT8173_PIN_124_KPCOL2__FUNC_USB_DRVVBUS_P1 (MTK_PIN_NO(124) | 5)
+#define MT8173_PIN_124_KPCOL2__FUNC_DBG_MON_A_16_ (MTK_PIN_NO(124) | 7)
+
+#define MT8173_PIN_125_SDA1__FUNC_GPIO125 (MTK_PIN_NO(125) | 0)
+#define MT8173_PIN_125_SDA1__FUNC_SDA1 (MTK_PIN_NO(125) | 1)
+
+#define MT8173_PIN_126_SCL1__FUNC_GPIO126 (MTK_PIN_NO(126) | 0)
+#define MT8173_PIN_126_SCL1__FUNC_SCL1 (MTK_PIN_NO(126) | 1)
+
+#define MT8173_PIN_127_LCM_RST__FUNC_GPIO127 (MTK_PIN_NO(127) | 0)
+#define MT8173_PIN_127_LCM_RST__FUNC_LCM_RST (MTK_PIN_NO(127) | 1)
+
+#define MT8173_PIN_128_I2S0_LRCK__FUNC_GPIO128 (MTK_PIN_NO(128) | 0)
+#define MT8173_PIN_128_I2S0_LRCK__FUNC_I2S0_WS (MTK_PIN_NO(128) | 1)
+#define MT8173_PIN_128_I2S0_LRCK__FUNC_I2S1_WS (MTK_PIN_NO(128) | 2)
+#define MT8173_PIN_128_I2S0_LRCK__FUNC_I2S2_WS (MTK_PIN_NO(128) | 3)
+#define MT8173_PIN_128_I2S0_LRCK__FUNC_SPI_CK_2_ (MTK_PIN_NO(128) | 5)
+#define MT8173_PIN_128_I2S0_LRCK__FUNC_DBG_MON_A_4_ (MTK_PIN_NO(128) | 7)
+
+#define MT8173_PIN_129_I2S0_BCK__FUNC_GPIO129 (MTK_PIN_NO(129) | 0)
+#define MT8173_PIN_129_I2S0_BCK__FUNC_I2S0_BCK (MTK_PIN_NO(129) | 1)
+#define MT8173_PIN_129_I2S0_BCK__FUNC_I2S1_BCK (MTK_PIN_NO(129) | 2)
+#define MT8173_PIN_129_I2S0_BCK__FUNC_I2S2_BCK (MTK_PIN_NO(129) | 3)
+#define MT8173_PIN_129_I2S0_BCK__FUNC_SPI_MI_2_ (MTK_PIN_NO(129) | 5)
+#define MT8173_PIN_129_I2S0_BCK__FUNC_DBG_MON_A_5_ (MTK_PIN_NO(129) | 7)
+
+#define MT8173_PIN_130_I2S0_MCK__FUNC_GPIO130 (MTK_PIN_NO(130) | 0)
+#define MT8173_PIN_130_I2S0_MCK__FUNC_I2S0_MCK (MTK_PIN_NO(130) | 1)
+#define MT8173_PIN_130_I2S0_MCK__FUNC_I2S1_MCK (MTK_PIN_NO(130) | 2)
+#define MT8173_PIN_130_I2S0_MCK__FUNC_I2S2_MCK (MTK_PIN_NO(130) | 3)
+#define MT8173_PIN_130_I2S0_MCK__FUNC_SPI_MO_2_ (MTK_PIN_NO(130) | 5)
+#define MT8173_PIN_130_I2S0_MCK__FUNC_DBG_MON_A_6_ (MTK_PIN_NO(130) | 7)
+
+#define MT8173_PIN_131_I2S0_DATA0__FUNC_GPIO131 (MTK_PIN_NO(131) | 0)
+#define MT8173_PIN_131_I2S0_DATA0__FUNC_I2S0_DO (MTK_PIN_NO(131) | 1)
+#define MT8173_PIN_131_I2S0_DATA0__FUNC_I2S1_DO_1 (MTK_PIN_NO(131) | 2)
+#define MT8173_PIN_131_I2S0_DATA0__FUNC_I2S2_DI_1 (MTK_PIN_NO(131) | 3)
+#define MT8173_PIN_131_I2S0_DATA0__FUNC_SPI_CS_2_ (MTK_PIN_NO(131) | 5)
+#define MT8173_PIN_131_I2S0_DATA0__FUNC_DBG_MON_A_7_ (MTK_PIN_NO(131) | 7)
+
+#define MT8173_PIN_132_I2S0_DATA1__FUNC_GPIO132 (MTK_PIN_NO(132) | 0)
+#define MT8173_PIN_132_I2S0_DATA1__FUNC_I2S0_DI (MTK_PIN_NO(132) | 1)
+#define MT8173_PIN_132_I2S0_DATA1__FUNC_I2S1_DO_2 (MTK_PIN_NO(132) | 2)
+#define MT8173_PIN_132_I2S0_DATA1__FUNC_I2S2_DI_2 (MTK_PIN_NO(132) | 3)
+#define MT8173_PIN_132_I2S0_DATA1__FUNC_DBG_MON_A_8_ (MTK_PIN_NO(132) | 7)
+
+#define MT8173_PIN_133_SDA4__FUNC_GPIO133 (MTK_PIN_NO(133) | 0)
+#define MT8173_PIN_133_SDA4__FUNC_SDA4 (MTK_PIN_NO(133) | 1)
+
+#define MT8173_PIN_134_SCL4__FUNC_GPIO134 (MTK_PIN_NO(134) | 0)
+#define MT8173_PIN_134_SCL4__FUNC_SCL4 (MTK_PIN_NO(134) | 1)
+
+#endif /* __DTS_MT8173_PINFUNC_H */
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
new file mode 100644
index 000000000..924fdb667
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2014 MediaTek Inc.
+ * Author: Eddie Huang <eddie.huang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include "mt8173-pinfunc.h"
+
+/ {
+	compatible = "mediatek,mt8173";
+	interrupt-parent = <&sysirq>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&cpu0>;
+				};
+				core1 {
+					cpu = <&cpu1>;
+				};
+			};
+
+			cluster1 {
+				core0 {
+					cpu = <&cpu2>;
+				};
+				core1 {
+					cpu = <&cpu3>;
+				};
+			};
+		};
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x000>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0x001>;
+			enable-method = "psci";
+		};
+
+		cpu2: cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x100>;
+			enable-method = "psci";
+		};
+
+		cpu3: cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x101>;
+			enable-method = "psci";
+		};
+	};
+
+	psci {
+		compatible = "arm,psci";
+		method = "smc";
+		cpu_suspend   = <0x84000001>;
+		cpu_off	      = <0x84000002>;
+		cpu_on	      = <0x84000003>;
+	};
+
+	uart_clk: dummy26m {
+		compatible = "fixed-clock";
+		clock-frequency = <26000000>;
+		#clock-cells = <0>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupt-parent = <&gic>;
+		interrupts = <GIC_PPI 13
+			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14
+			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11
+			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10
+			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	soc {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		compatible = "simple-bus";
+		ranges;
+
+		syscfg_pctl_a: syscfg_pctl_a@10005000 {
+			compatible = "mediatek,mt8173-pctl-a-syscfg", "syscon";
+			reg = <0 0x10005000 0 0x1000>;
+		};
+
+		pio: pinctrl@0x10005000 {
+			compatible = "mediatek,mt8173-pinctrl";
+			reg = <0 0x1000B000 0 0x1000>;
+			mediatek,pctl-regmap = <&syscfg_pctl_a>;
+			pins-are-numbered;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupts = <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+						<GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+						<GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		sysirq: intpol-controller@10200620 {
+			compatible = "mediatek,mt8173-sysirq",
+					"mediatek,mt6577-sysirq";
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			interrupt-parent = <&gic>;
+			reg = <0 0x10200620 0 0x20>;
+		};
+
+		gic: interrupt-controller@10220000 {
+			compatible = "arm,gic-400";
+			#interrupt-cells = <3>;
+			interrupt-parent = <&gic>;
+			interrupt-controller;
+			reg = <0 0x10221000 0 0x1000>,
+			      <0 0x10222000 0 0x2000>,
+			      <0 0x10224000 0 0x2000>,
+			      <0 0x10226000 0 0x2000>;
+			interrupts = <GIC_PPI 9
+				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		};
+
+		uart0: serial@11002000 {
+			compatible = "mediatek,mt8173-uart",
+					"mediatek,mt6577-uart";
+			reg = <0 0x11002000 0 0x400>;
+			interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		uart1: serial@11003000 {
+			compatible = "mediatek,mt8173-uart",
+					"mediatek,mt6577-uart";
+			reg = <0 0x11003000 0 0x400>;
+			interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		uart2: serial@11004000 {
+			compatible = "mediatek,mt8173-uart",
+					"mediatek,mt6577-uart";
+			reg = <0 0x11004000 0 0x400>;
+			interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		uart3: serial@11005000 {
+			compatible = "mediatek,mt8173-uart",
+					"mediatek,mt6577-uart";
+			reg = <0 0x11005000 0 0x400>;
+			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+	};
+
+};
+
diff --git a/arch/arm64/boot/dts/qcom/Makefile b/arch/arm64/boot/dts/qcom/Makefile
new file mode 100644
index 000000000..8e94af64e
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_QCOM)	+= apq8016-sbc.dtb msm8916-mtp.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
new file mode 100644
index 000000000..825f489a2
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/dts-v1/;
+
+#include "apq8016-sbc.dtsi"
+
+/ {
+	model = "Qualcomm Technologies, Inc. APQ 8016 SBC";
+	compatible = "qcom,apq8016-sbc", "qcom,apq8016", "qcom,sbc";
+};
diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
new file mode 100644
index 000000000..703a4f16e
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "msm8916.dtsi"
+
+/ {
+	aliases {
+		serial0 = &blsp1_uart2;
+	};
+
+	chosen {
+		stdout-path = "serial0";
+	};
+
+	soc {
+		serial@78b0000 {
+			status = "okay";
+			pinctrl-names = "default", "sleep";
+			pinctrl-0 = <&blsp1_uart2_default>;
+			pinctrl-1 = <&blsp1_uart2_sleep>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-mtp.dts b/arch/arm64/boot/dts/qcom/msm8916-mtp.dts
new file mode 100644
index 000000000..fced77f0f
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916-mtp.dts
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/dts-v1/;
+
+#include "msm8916-mtp.dtsi"
+
+/ {
+	model = "Qualcomm Technologies, Inc. MSM 8916 MTP";
+	compatible = "qcom,msm8916-mtp", "qcom,msm8916-mtp-smb1360",
+			"qcom,msm8916", "qcom,mtp";
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi
new file mode 100644
index 000000000..bea871b0d
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "msm8916.dtsi"
+
+/ {
+	aliases {
+		serial0 = &blsp1_uart2;
+	};
+
+	chosen {
+		stdout-path = "serial0";
+	};
+
+	soc {
+		serial@78b0000 {
+			status = "okay";
+			pinctrl-names = "default", "sleep";
+			pinctrl-0 = <&blsp1_uart2_default>;
+			pinctrl-1 = <&blsp1_uart2_sleep>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
new file mode 100644
index 000000000..f212b8303
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/clock/qcom,gcc-msm8916.h>
+#include <dt-bindings/reset/qcom,gcc-msm8916.h>
+
+/ {
+	model = "Qualcomm Technologies, Inc. MSM8916";
+	compatible = "qcom,msm8916";
+
+	interrupt-parent = <&intc>;
+
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases { };
+
+	chosen { };
+
+	memory {
+		device_type = "memory";
+		/* We expect the bootloader to fill in the reg */
+		reg = <0 0 0 0>;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		CPU0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0>;
+		};
+
+		CPU1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x1>;
+		};
+
+		CPU2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x2>;
+		};
+
+		CPU3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x3>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 4 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 1 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	soc: soc {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0xffffffff>;
+		compatible = "simple-bus";
+
+		pinctrl@1000000 {
+			compatible = "qcom,msm8916-pinctrl";
+			reg = <0x1000000 0x300000>;
+			interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+
+			blsp1_uart2_default: blsp1_uart2_default {
+				pinmux {
+					function = "blsp_uart2";
+					pins = "gpio4", "gpio5";
+				};
+				pinconf {
+					pins = "gpio4", "gpio5";
+					drive-strength = <16>;
+					bias-disable;
+				};
+			};
+
+			blsp1_uart2_sleep: blsp1_uart2_sleep {
+				pinmux {
+					function = "blsp_uart2";
+					pins = "gpio4", "gpio5";
+				};
+				pinconf {
+					pins = "gpio4", "gpio5";
+					drive-strength = <2>;
+					bias-pull-down;
+				};
+			};
+		};
+
+		gcc: qcom,gcc@1800000 {
+			compatible = "qcom,gcc-msm8916";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+			reg = <0x1800000 0x80000>;
+		};
+
+		blsp1_uart2: serial@78b0000 {
+			compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+			reg = <0x78b0000 0x200>;
+			interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>;
+			clock-names = "core", "iface";
+			status = "disabled";
+		};
+
+		intc: interrupt-controller@b000000 {
+			compatible = "qcom,msm-qgic2";
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			reg = <0x0b000000 0x1000>, <0x0b002000 0x1000>;
+		};
+
+		timer@b020000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges;
+			compatible = "arm,armv7-timer-mem";
+			reg = <0xb020000 0x1000>;
+			clock-frequency = <19200000>;
+
+			frame@b021000 {
+				frame-number = <0>;
+				interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0xb021000 0x1000>,
+				      <0xb022000 0x1000>;
+			};
+
+			frame@b023000 {
+				frame-number = <1>;
+				interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0xb023000 0x1000>;
+				status = "disabled";
+			};
+
+			frame@b024000 {
+				frame-number = <2>;
+				interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0xb024000 0x1000>;
+				status = "disabled";
+			};
+
+			frame@b025000 {
+				frame-number = <3>;
+				interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0xb025000 0x1000>;
+				status = "disabled";
+			};
+
+			frame@b026000 {
+				frame-number = <4>;
+				interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0xb026000 0x1000>;
+				status = "disabled";
+			};
+
+			frame@b027000 {
+				frame-number = <5>;
+				interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0xb027000 0x1000>;
+				status = "disabled";
+			};
+
+			frame@b028000 {
+				frame-number = <6>;
+				interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0xb028000 0x1000>;
+				status = "disabled";
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/skeleton.dtsi b/arch/arm64/boot/dts/skeleton.dtsi
new file mode 100644
index 000000000..38ead821b
--- /dev/null
+++ b/arch/arm64/boot/dts/skeleton.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Skeleton device tree; the bare minimum needed to boot; just include and
+ * add a compatible value.  The bootloader will typically populate the memory
+ * node.
+ */
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+	memory { device_type = "memory"; reg = <0 0 0>; };
+};
diff --git a/arch/arm64/boot/dts/sprd/Makefile b/arch/arm64/boot/dts/sprd/Makefile
new file mode 100644
index 000000000..b658c5e09
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_SPRD) += sc9836-openphone.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/sprd/sc9836-openphone.dts b/arch/arm64/boot/dts/sprd/sc9836-openphone.dts
new file mode 100644
index 000000000..e5657c35c
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd/sc9836-openphone.dts
@@ -0,0 +1,49 @@
+/*
+ * Spreadtrum SC9836 openphone board DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is licensed under a dual GPLv2 or X11 license.
+ */
+
+/dts-v1/;
+
+#include "sc9836.dtsi"
+
+/ {
+	model = "Spreadtrum SC9836 Openphone Board";
+
+	compatible = "sprd,sc9836-openphone", "sprd,sc9836";
+
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart1;
+		serial2 = &uart2;
+		serial3 = &uart3;
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x20000000>;
+	};
+
+	chosen {
+		stdout-path = "serial1:115200n8";
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&uart3 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/sprd/sc9836.dtsi b/arch/arm64/boot/dts/sprd/sc9836.dtsi
new file mode 100644
index 000000000..ee34e1a36
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd/sc9836.dtsi
@@ -0,0 +1,129 @@
+/*
+ * Spreadtrum SC9836 SoC DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is licensed under a dual GPLv2 or X11 license.
+ */
+
+#include "sharkl64.dtsi"
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	compatible = "sprd,sc9836";
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "psci";
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "psci";
+		};
+	};
+
+	etf@10003000 {
+		compatible = "arm,coresight-tmc", "arm,primecell";
+		reg = <0 0x10003000 0 0x1000>;
+		clocks = <&clk26mhz>;
+		clock-names = "apb_pclk";
+		port {
+			etf_in: endpoint {
+				slave-mode;
+				remote-endpoint = <&funnel_out_port0>;
+			};
+		};
+	};
+
+	funnel@10001000 {
+		compatible = "arm,coresight-funnel", "arm,primecell";
+		reg = <0 0x10001000 0 0x1000>;
+		clocks = <&clk26mhz>;
+		clock-names = "apb_pclk";
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			/* funnel output port */
+			port@0 {
+				reg = <0>;
+				funnel_out_port0: endpoint {
+					remote-endpoint = <&etf_in>;
+				};
+			};
+
+			/* funnel input port 0~3 is reserved for ETMs */
+			port@1 {
+				reg = <4>;
+				funnel_in_port4: endpoint {
+					slave-mode;
+					remote-endpoint = <&stm_out>;
+				};
+			};
+		};
+	};
+
+	stm@10006000 {
+		compatible = "arm,coresight-stm", "arm,primecell";
+		reg = <0 0x10006000 0 0x1000>,
+		      <0 0x01000000 0 0x180000>;
+		reg-names = "stm-base", "stm-stimulus-base";
+		clocks = <&clk26mhz>;
+		clock-names = "apb_pclk";
+		port {
+			stm_out: endpoint {
+				remote-endpoint = <&funnel_in_port4>;
+			};
+		};
+	};
+
+	gic: interrupt-controller@12001000 {
+		compatible = "arm,gic-400";
+		reg = <0 0x12001000 0 0x1000>,
+		      <0 0x12002000 0 0x2000>,
+		      <0 0x12004000 0 0x2000>,
+		      <0 0x12006000 0 0x2000>;
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+	};
+
+	psci {
+		compatible	= "arm,psci";
+		method		= "smc";
+		cpu_on		= <0xc4000003>;
+		cpu_off		= <0x84000002>;
+		cpu_suspend	= <0xc4000001>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+};
diff --git a/arch/arm64/boot/dts/sprd/sharkl64.dtsi b/arch/arm64/boot/dts/sprd/sharkl64.dtsi
new file mode 100644
index 000000000..69f64e7fc
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd/sharkl64.dtsi
@@ -0,0 +1,65 @@
+/*
+ * Spreadtrum Sharkl64 platform DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is licensed under a dual GPLv2 or X11 license.
+ */
+
+/ {
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		ap-apb {
+			compatible = "simple-bus";
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+
+			uart0: serial@70000000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70000000 0 0x100>;
+				interrupts = <0 2 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+
+			uart1: serial@70100000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70100000 0 0x100>;
+				interrupts = <0 3 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+
+			uart2: serial@70200000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70200000 0 0x100>;
+				interrupts = <0 4 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+
+			uart3: serial@70300000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70300000 0 0x100>;
+				interrupts = <0 5 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+		};
+	};
+
+	clk26mhz: clk26mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <26000000>;
+	};
+};
diff --git a/arch/arm64/boot/dts/xilinx/Makefile b/arch/arm64/boot/dts/xilinx/Makefile
new file mode 100644
index 000000000..ae16427f6
--- /dev/null
+++ b/arch/arm64/boot/dts/xilinx/Makefile
@@ -0,0 +1,5 @@
+dtb-$(CONFIG_ARCH_ZYNQMP) += zynqmp-ep108.dtb
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts
new file mode 100644
index 000000000..0a3f40ecd
--- /dev/null
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts
@@ -0,0 +1,47 @@
+/*
+ * dts file for Xilinx ZynqMP ep108 development board
+ *
+ * (C) Copyright 2014 - 2015, Xilinx, Inc.
+ *
+ * Michal Simek <michal.simek@xilinx.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/dts-v1/;
+
+/include/ "zynqmp.dtsi"
+
+/ {
+	model = "ZynqMP EP108";
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x40000000>;
+	};
+};
+
+&gem0 {
+	status = "okay";
+	phy-handle = <&phy0>;
+	phy-mode = "rgmii-id";
+	phy0: phy@0{
+		reg = <0>;
+		max-speed = <100>;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
new file mode 100644
index 000000000..11e0b0004
--- /dev/null
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -0,0 +1,305 @@
+/*
+ * dts file for Xilinx ZynqMP
+ *
+ * (C) Copyright 2014 - 2015, Xilinx, Inc.
+ *
+ * Michal Simek <michal.simek@xilinx.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/ {
+	compatible = "xlnx,zynqmp";
+	#address-cells = <2>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			compatible = "arm,cortex-a53", "arm,armv8";
+			device_type = "cpu";
+			enable-method = "psci";
+			reg = <0x0>;
+		};
+
+		cpu@1 {
+			compatible = "arm,cortex-a53", "arm,armv8";
+			device_type = "cpu";
+			enable-method = "psci";
+			reg = <0x1>;
+		};
+
+		cpu@2 {
+			compatible = "arm,cortex-a53", "arm,armv8";
+			device_type = "cpu";
+			enable-method = "psci";
+			reg = <0x2>;
+		};
+
+		cpu@3 {
+			compatible = "arm,cortex-a53", "arm,armv8";
+			device_type = "cpu";
+			enable-method = "psci";
+			reg = <0x3>;
+		};
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <0 143 4>,
+			     <0 144 4>,
+			     <0 145 4>,
+			     <0 146 4>;
+	};
+
+	psci {
+		compatible = "arm,psci-0.2";
+		method = "smc";
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupt-parent = <&gic>;
+		interrupts = <1 13 0xf01>,
+			     <1 14 0xf01>,
+			     <1 11 0xf01>,
+			     <1 10 0xf01>;
+	};
+
+	amba_apu {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+
+		gic: interrupt-controller@f9010000 {
+			compatible = "arm,gic-400", "arm,cortex-a15-gic";
+			#interrupt-cells = <3>;
+			reg = <0x0 0xf9010000 0x10000>,
+			      <0x0 0xf902f000 0x2000>,
+			      <0x0 0xf9040000 0x20000>,
+			      <0x0 0xf906f000 0x2000>;
+			interrupt-controller;
+			interrupt-parent = <&gic>;
+			interrupts = <1 9 0xf04>;
+		};
+	};
+
+	amba {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+
+		misc_clk: misc_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <25000000>;
+		};
+
+		ttc0: timer@ff110000 {
+			compatible = "cdns,ttc";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+			reg = <0x0 0xff110000 0x1000>;
+			clocks = <&misc_clk>;
+			timer-width = <32>;
+		};
+
+		ttc1: timer@ff120000 {
+			compatible = "cdns,ttc";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 39 4>, <0 40 4>, <0 41 4>;
+			reg = <0x0 0xff120000 0x1000>;
+			clocks = <&misc_clk>;
+			timer-width = <32>;
+		};
+
+		ttc2: timer@ff130000 {
+			compatible = "cdns,ttc";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 42 4>, <0 43 4>, <0 44 4>;
+			reg = <0x0 0xff130000 0x1000>;
+			clocks = <&misc_clk>;
+			timer-width = <32>;
+		};
+
+		ttc3: timer@ff140000 {
+			compatible = "cdns,ttc";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 45 4>, <0 46 4>, <0 47 4>;
+			reg = <0x0 0xff140000 0x1000>;
+			clocks = <&misc_clk>;
+			timer-width = <32>;
+		};
+
+		uart0: serial@ff000000 {
+			compatible = "cdns,uart-r1p8";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 21 4>;
+			reg = <0x0 0xff000000 0x1000>;
+			clock-names = "uart_clk", "pclk";
+			clocks = <&misc_clk &misc_clk>;
+		};
+
+		uart1: serial@ff010000 {
+			compatible = "cdns,uart-r1p8";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 22 4>;
+			reg = <0x0 0xff010000 0x1000>;
+			clock-names = "uart_clk", "pclk";
+			clocks = <&misc_clk &misc_clk>;
+		};
+
+		gpio: gpio@ff0a0000 {
+			compatible = "xlnx,zynq-gpio-1.0";
+			status = "disabled";
+			#gpio-cells = <0x2>;
+			clocks = <&misc_clk>;
+			interrupt-parent = <&gic>;
+			interrupts = <0 16 4>;
+			reg = <0x0 0xff0a0000 0x1000>;
+		};
+
+		gem0: ethernet@ff0b0000 {
+			compatible = "cdns,gem";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 57 4>, <0 57 4>;
+			reg = <0x0 0xff0b0000 0x1000>;
+			clock-names = "pclk", "hclk", "tx_clk";
+			clocks = <&misc_clk>, <&misc_clk>, <&misc_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		gem1: ethernet@ff0c0000 {
+			compatible = "cdns,gem";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 59 4>, <0 59 4>;
+			reg = <0x0 0xff0c0000 0x1000>;
+			clock-names = "pclk", "hclk", "tx_clk";
+			clocks = <&misc_clk>, <&misc_clk>, <&misc_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		gem2: ethernet@ff0d0000 {
+			compatible = "cdns,gem";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 61 4>, <0 61 4>;
+			reg = <0x0 0xff0d0000 0x1000>;
+			clock-names = "pclk", "hclk", "tx_clk";
+			clocks = <&misc_clk>, <&misc_clk>, <&misc_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		gem3: ethernet@ff0e0000 {
+			compatible = "cdns,gem";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 63 4>, <0 63 4>;
+			reg = <0x0 0xff0e0000 0x1000>;
+			clock-names = "pclk", "hclk", "tx_clk";
+			clocks = <&misc_clk>, <&misc_clk>, <&misc_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		spi0: spi@ff040000 {
+			compatible = "cdns,spi-r1p6";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 19 4>;
+			reg = <0x0 0xff040000 0x1000>;
+			clock-names = "ref_clk", "pclk";
+			clocks = <&misc_clk &misc_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		spi1: spi@ff050000 {
+			compatible = "cdns,spi-r1p6";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 20 4>;
+			reg = <0x0 0xff050000 0x1000>;
+			clock-names = "ref_clk", "pclk";
+			clocks = <&misc_clk &misc_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		i2c_clk: i2c_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0x0>;
+			clock-frequency = <111111111>;
+		};
+
+		i2c0: i2c@ff020000 {
+			compatible = "cdns,i2c-r1p10";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 17 4>;
+			reg = <0x0 0xff020000 0x1000>;
+			clocks = <&i2c_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		i2c1: i2c@ff030000 {
+			compatible = "cdns,i2c-r1p10";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 18 4>;
+			reg = <0x0 0xff030000 0x1000>;
+			clocks = <&i2c_clk>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
+		sdhci0: sdhci@ff160000 {
+			compatible = "arasan,sdhci-8.9a";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 48 4>;
+			reg = <0x0 0xff160000 0x1000>;
+			clock-names = "clk_xin", "clk_ahb";
+			clocks = <&misc_clk>, <&misc_clk>;
+		};
+
+		sdhci1: sdhci@ff170000 {
+			compatible = "arasan,sdhci-8.9a";
+			status = "disabled";
+			interrupt-parent = <&gic>;
+			interrupts = <0 49 4>;
+			reg = <0x0 0xff170000 0x1000>;
+			clock-names = "clk_xin", "clk_ahb";
+			clocks = <&misc_clk>, <&misc_clk>;
+		};
+
+		watchdog0: watchdog@fd4d0000 {
+			compatible = "cdns,wdt-r1p2";
+			status = "disabled";
+			clocks= <&misc_clk>;
+			interrupt-parent = <&gic>;
+			interrupts = <0 52 1>;
+			reg = <0x0 0xfd4d0000 0x1000>;
+			timeout-sec = <10>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh
new file mode 100644
index 000000000..12ed78aa6
--- /dev/null
+++ b/arch/arm64/boot/install.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+#
+# arch/arm64/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+# Adapted from code in arch/i386/boot/install.sh by Russell King
+#
+# "make install" script for the AArch64 Linux port
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+if [ "$(basename $2)" = "Image.gz" ]; then
+# Compressed install
+  echo "Installing compressed kernel"
+  base=vmlinuz
+else
+# Normal install
+  echo "Installing normal kernel"
+  base=vmlinux
+fi
+
+if [ -f $4/$base-$1 ]; then
+  mv $4/$base-$1 $4/$base-$1.old
+fi
+cat $2 > $4/$base-$1
+
+# Install system map file
+if [ -f $4/System.map-$1 ]; then
+  mv $4/System.map-$1 $4/System.map-$1.old
+fi
+cp $3 $4/System.map-$1
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
new file mode 100644
index 000000000..2ed7449d9
--- /dev/null
+++ b/arch/arm64/configs/defconfig
@@ -0,0 +1,191 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_CGROUP_HUGETLB=y
+# CONFIG_UTS_NS is not set
+# CONFIG_IPC_NS is not set
+# CONFIG_NET_NS is not set
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_ARCH_EXYNOS7=y
+CONFIG_ARCH_FSL_LS2085A=y
+CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_SEATTLE=y
+CONFIG_ARCH_TEGRA=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_QCOM=y
+CONFIG_ARCH_SPRD=y
+CONFIG_ARCH_THUNDER=y
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_XGENE=y
+CONFIG_ARCH_ZYNQMP=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI_XGENE=y
+CONFIG_SMP=y
+CONFIG_PREEMPT=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA=y
+CONFIG_CMDLINE="console=ttyAMA0"
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_COMPAT=y
+CONFIG_CPU_IDLE=y
+CONFIG_ARM_CPUIDLE=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_INET_LRO is not set
+# CONFIG_IPV6 is not set
+CONFIG_BPF_JIT=y
+# CONFIG_WIRELESS is not set
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+# CONFIG_TEGRA_AHB is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_XGENE=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_PATA_OF_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_NET_XGENE=y
+CONFIG_SKY2=y
+CONFIG_SMC91X=y
+CONFIG_SMSC911X=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_AMBAKMI=y
+CONFIG_LEGACY_PTY_COUNT=16
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_MT6577=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_MSM=y
+CONFIG_SERIAL_MSM_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_XILINX_PS_UART=y
+CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_SPI=y
+CONFIG_SPI_PL022=y
+CONFIG_PINCTRL_MSM8916=y
+CONFIG_GPIO_PL061=y
+CONFIG_GPIO_XGENE=y
+CONFIG_POWER_RESET_XGENE=y
+CONFIG_POWER_RESET_SYSCON=y
+# CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_FB=y
+CONFIG_FB_ARMCLCD=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
+CONFIG_USB_ULPI=y
+CONFIG_MMC=y
+CONFIG_MMC_ARMMMCI=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SPI=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_XGENE=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_COMMON_CLK_QCOM=y
+CONFIG_MSM_GCC_8916=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_PHY_XGENE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_EFIVAR_FS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_9P_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOCKUP_DETECTOR=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
+CONFIG_SECURITY=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_ARM64_CRYPTO=y
+CONFIG_CRYPTO_SHA1_ARM64_CE=y
+CONFIG_CRYPTO_SHA2_ARM64_CE=y
+CONFIG_CRYPTO_GHASH_ARM64_CE=y
+CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
+CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
+CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
new file mode 100644
index 000000000..2cf32e988
--- /dev/null
+++ b/arch/arm64/crypto/Kconfig
@@ -0,0 +1,56 @@
+
+menuconfig ARM64_CRYPTO
+	bool "ARM64 Accelerated Cryptographic Algorithms"
+	depends on ARM64
+	help
+	  Say Y here to choose from a selection of cryptographic algorithms
+	  implemented using ARM64 specific CPU features or instructions.
+
+if ARM64_CRYPTO
+
+config CRYPTO_SHA1_ARM64_CE
+	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
+config CRYPTO_SHA2_ARM64_CE
+	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
+config CRYPTO_GHASH_ARM64_CE
+	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
+config CRYPTO_AES_ARM64_CE
+	tristate "AES core cipher using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+
+config CRYPTO_AES_ARM64_CE_CCM
+	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AEAD
+
+config CRYPTO_AES_ARM64_CE_BLK
+	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_ABLK_HELPER
+
+config CRYPTO_AES_ARM64_NEON_BLK
+	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+	select CRYPTO_ABLK_HELPER
+
+config CRYPTO_CRC32_ARM64
+	tristate "CRC32 and CRC32C using optional ARMv8 instructions"
+	depends on ARM64
+	select CRYPTO_HASH
+endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
new file mode 100644
index 000000000..abb79b3cf
--- /dev/null
+++ b/arch/arm64/crypto/Makefile
@@ -0,0 +1,42 @@
+#
+# linux/arch/arm64/crypto/Makefile
+#
+# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
+sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
+sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+
+obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
+CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
+aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
+aes-ce-blk-y := aes-glue-ce.o aes-ce.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
+aes-neon-blk-y := aes-glue-neon.o aes-neon.o
+
+AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
+AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
+
+CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
+
+obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
+
+CFLAGS_crc32-arm64.o	:= -mcpu=generic+crc
+
+$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
+	$(call if_changed_rule,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
new file mode 100644
index 000000000..a2a7fbcac
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -0,0 +1,222 @@
+/*
+ * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.arch	armv8-a+crypto
+
+	/*
+	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+	 *			     u32 *macp, u8 const rk[], u32 rounds);
+	 */
+ENTRY(ce_aes_ccm_auth_data)
+	ldr	w8, [x3]			/* leftover from prev round? */
+	ld1	{v0.2d}, [x0]			/* load mac */
+	cbz	w8, 1f
+	sub	w8, w8, #16
+	eor	v1.16b, v1.16b, v1.16b
+0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
+	subs	w2, w2, #1
+	add	w8, w8, #1
+	ins	v1.b[0], w7
+	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
+	beq	8f				/* out of input? */
+	cbnz	w8, 0b
+	eor	v0.16b, v0.16b, v1.16b
+1:	ld1	{v3.2d}, [x4]			/* load first round key */
+	prfm	pldl1strm, [x1]
+	cmp	w5, #12				/* which key size? */
+	add	x6, x4, #16
+	sub	w7, w5, #2			/* modified # of rounds */
+	bmi	2f
+	bne	5f
+	mov	v5.16b, v3.16b
+	b	4f
+2:	mov	v4.16b, v3.16b
+	ld1	{v5.2d}, [x6], #16		/* load 2nd round key */
+3:	aese	v0.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+4:	ld1	{v3.2d}, [x6], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+5:	ld1	{v4.2d}, [x6], #16		/* load next round key */
+	subs	w7, w7, #3
+	aese	v0.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	ld1	{v5.2d}, [x6], #16		/* load next round key */
+	bpl	3b
+	aese	v0.16b, v4.16b
+	subs	w2, w2, #16			/* last data? */
+	eor	v0.16b, v0.16b, v5.16b		/* final round */
+	bmi	6f
+	ld1	{v1.16b}, [x1], #16		/* load next input block */
+	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
+	bne	1b
+6:	st1	{v0.2d}, [x0]			/* store mac */
+	beq	10f
+	adds	w2, w2, #16
+	beq	10f
+	mov	w8, w2
+7:	ldrb	w7, [x1], #1
+	umov	w6, v0.b[0]
+	eor	w6, w6, w7
+	strb	w6, [x0], #1
+	subs	w2, w2, #1
+	beq	10f
+	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
+	b	7b
+8:	mov	w7, w8
+	add	w8, w8, #16
+9:	ext	v1.16b, v1.16b, v1.16b, #1
+	adds	w7, w7, #1
+	bne	9b
+	eor	v0.16b, v0.16b, v1.16b
+	st1	{v0.2d}, [x0]
+10:	str	w8, [x3]
+	ret
+ENDPROC(ce_aes_ccm_auth_data)
+
+	/*
+	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
+	 * 			 u32 rounds);
+	 */
+ENTRY(ce_aes_ccm_final)
+	ld1	{v3.2d}, [x2], #16		/* load first round key */
+	ld1	{v0.2d}, [x0]			/* load mac */
+	cmp	w3, #12				/* which key size? */
+	sub	w3, w3, #2			/* modified # of rounds */
+	ld1	{v1.2d}, [x1]			/* load 1st ctriv */
+	bmi	0f
+	bne	3f
+	mov	v5.16b, v3.16b
+	b	2f
+0:	mov	v4.16b, v3.16b
+1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
+	aese	v0.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v4.16b
+	aesmc	v1.16b, v1.16b
+2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v5.16b
+	aesmc	v1.16b, v1.16b
+3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
+	subs	w3, w3, #3
+	aese	v0.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v3.16b
+	aesmc	v1.16b, v1.16b
+	bpl	1b
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	/* final round key cancels out */
+	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
+	st1	{v0.2d}, [x0]			/* store result */
+	ret
+ENDPROC(ce_aes_ccm_final)
+
+	.macro	aes_ccm_do_crypt,enc
+	ldr	x8, [x6, #8]			/* load lower ctr */
+	ld1	{v0.2d}, [x5]			/* load mac */
+	rev	x8, x8				/* keep swabbed ctr in reg */
+0:	/* outer loop */
+	ld1	{v1.1d}, [x6]			/* load upper ctr */
+	prfm	pldl1strm, [x1]
+	add	x8, x8, #1
+	rev	x9, x8
+	cmp	w4, #12				/* which key size? */
+	sub	w7, w4, #2			/* get modified # of rounds */
+	ins	v1.d[1], x9			/* no carry in lower ctr */
+	ld1	{v3.2d}, [x3]			/* load first round key */
+	add	x10, x3, #16
+	bmi	1f
+	bne	4f
+	mov	v5.16b, v3.16b
+	b	3f
+1:	mov	v4.16b, v3.16b
+	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
+2:	/* inner loop: 3 rounds, 2x interleaved */
+	aese	v0.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v4.16b
+	aesmc	v1.16b, v1.16b
+3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v5.16b
+	aesmc	v1.16b, v1.16b
+4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
+	subs	w7, w7, #3
+	aese	v0.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v3.16b
+	aesmc	v1.16b, v1.16b
+	ld1	{v5.2d}, [x10], #16		/* load next round key */
+	bpl	2b
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	subs	w2, w2, #16
+	bmi	6f				/* partial block? */
+	ld1	{v2.16b}, [x1], #16		/* load next input block */
+	.if	\enc == 1
+	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
+	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
+	.else
+	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
+	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
+	.endif
+	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
+	st1	{v1.16b}, [x0], #16		/* write output block */
+	bne	0b
+	rev	x8, x8
+	st1	{v0.2d}, [x5]			/* store mac */
+	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
+5:	ret
+
+6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
+	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
+	st1	{v0.2d}, [x5]			/* store mac */
+	add	w2, w2, #16			/* process partial tail block */
+7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
+	umov	w6, v1.b[0]			/* get top crypted ctr byte */
+	umov	w7, v0.b[0]			/* get top mac byte */
+	.if	\enc == 1
+	eor	w7, w7, w9
+	eor	w9, w9, w6
+	.else
+	eor	w9, w9, w6
+	eor	w7, w7, w9
+	.endif
+	strb	w9, [x0], #1			/* store out byte */
+	strb	w7, [x5], #1			/* store mac byte */
+	subs	w2, w2, #1
+	beq	5b
+	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
+	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
+	b	7b
+	.endm
+
+	/*
+	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+	 * 			   u8 const rk[], u32 rounds, u8 mac[],
+	 * 			   u8 ctr[]);
+	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+	 * 			   u8 const rk[], u32 rounds, u8 mac[],
+	 * 			   u8 ctr[]);
+	 */
+ENTRY(ce_aes_ccm_encrypt)
+	aes_ccm_do_crypt	1
+ENDPROC(ce_aes_ccm_encrypt)
+
+ENTRY(ce_aes_ccm_decrypt)
+	aes_ccm_do_crypt	0
+ENDPROC(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
new file mode 100644
index 000000000..6c348df5b
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -0,0 +1,299 @@
+/*
+ * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include "aes-ce-setkey.h"
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+				     u32 *macp, u32 const rk[], u32 rounds);
+
+asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+				   u32 const rk[], u32 rounds, u8 mac[],
+				   u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+				   u32 const rk[], u32 rounds, u8 mac[],
+				   u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
+				 u32 rounds);
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+		      unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
+
+	ret = ce_aes_expandkey(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	if ((authsize & 1) || authsize < 4)
+		return -EINVAL;
+	return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+	u32 l = req->iv[0] + 1;
+
+	/* verify that CCM dimension 'L' is set correctly in the IV */
+	if (l < 2 || l > 8)
+		return -EINVAL;
+
+	/* verify that msglen can in fact be represented in L bytes */
+	if (l < 4 && msglen >> (8 * l))
+		return -EOVERFLOW;
+
+	/*
+	 * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+	 * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+	 */
+	n[0] = 0;
+	n[1] = cpu_to_be32(msglen);
+
+	memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+	/*
+	 * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+	 * - bits 0..2	: max # of bytes required to represent msglen, minus 1
+	 *                (already set by caller)
+	 * - bits 3..5	: size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+	 * - bit 6	: indicates presence of authenticate-only data
+	 */
+	maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+	if (req->assoclen)
+		maciv[0] |= 0x40;
+
+	memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+	return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct __packed { __be16 l; __be32 h; u16 len; } ltag;
+	struct scatter_walk walk;
+	u32 len = req->assoclen;
+	u32 macp = 0;
+
+	/* prepend the AAD with a length tag */
+	if (len < 0xff00) {
+		ltag.l = cpu_to_be16(len);
+		ltag.len = 2;
+	} else  {
+		ltag.l = cpu_to_be16(0xfffe);
+		put_unaligned_be32(len, &ltag.h);
+		ltag.len = 6;
+	}
+
+	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
+			     num_rounds(ctx));
+	scatterwalk_start(&walk, req->assoc);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, len);
+		u8 *p;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		p = scatterwalk_map(&walk);
+		ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
+				     num_rounds(ctx));
+		len -= n;
+
+		scatterwalk_unmap(p);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	} while (len);
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct blkcipher_desc desc = { .info = req->iv };
+	struct blkcipher_walk walk;
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u32 len = req->cryptlen;
+	int err;
+
+	err = ccm_init_mac(req, mac, len);
+	if (err)
+		return err;
+
+	kernel_neon_begin_partial(6);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, mac);
+
+	/* preserve the original iv for the final round */
+	memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+					     AES_BLOCK_SIZE);
+
+	while (walk.nbytes) {
+		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+		if (walk.nbytes == len)
+			tail = 0;
+
+		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   walk.nbytes - tail, ctx->key_enc,
+				   num_rounds(ctx), mac, walk.iv);
+
+		len -= walk.nbytes - tail;
+		err = blkcipher_walk_done(&desc, &walk, tail);
+	}
+	if (!err)
+		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+	kernel_neon_end();
+
+	if (err)
+		return err;
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct blkcipher_desc desc = { .info = req->iv };
+	struct blkcipher_walk walk;
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u32 len = req->cryptlen - authsize;
+	int err;
+
+	err = ccm_init_mac(req, mac, len);
+	if (err)
+		return err;
+
+	kernel_neon_begin_partial(6);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, mac);
+
+	/* preserve the original iv for the final round */
+	memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+					     AES_BLOCK_SIZE);
+
+	while (walk.nbytes) {
+		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+		if (walk.nbytes == len)
+			tail = 0;
+
+		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   walk.nbytes - tail, ctx->key_enc,
+				   num_rounds(ctx), mac, walk.iv);
+
+		len -= walk.nbytes - tail;
+		err = blkcipher_walk_done(&desc, &walk, tail);
+	}
+	if (!err)
+		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+	kernel_neon_end();
+
+	if (err)
+		return err;
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (memcmp(mac, buf, authsize))
+		return -EBADMSG;
+	return 0;
+}
+
+static struct crypto_alg ccm_aes_alg = {
+	.cra_name		= "ccm(aes)",
+	.cra_driver_name	= "ccm-aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_AEAD,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_aead_type,
+	.cra_module		= THIS_MODULE,
+	.cra_aead = {
+		.ivsize		= AES_BLOCK_SIZE,
+		.maxauthsize	= AES_BLOCK_SIZE,
+		.setkey		= ccm_setkey,
+		.setauthsize	= ccm_setauthsize,
+		.encrypt	= ccm_encrypt,
+		.decrypt	= ccm_decrypt,
+	}
+};
+
+static int __init aes_mod_init(void)
+{
+	if (!(elf_hwcap & HWCAP_AES))
+		return -ENODEV;
+	return crypto_register_alg(&ccm_aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+	crypto_unregister_alg(&ccm_aes_alg);
+}
+
+module_init(aes_mod_init);
+module_exit(aes_mod_exit);
+
+MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ccm(aes)");
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
new file mode 100644
index 000000000..ce47792a9
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -0,0 +1,265 @@
+/*
+ * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include "aes-ce-setkey.h"
+
+MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct aes_block {
+	u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aes_block *out = (struct aes_block *)dst;
+	struct aes_block const *in = (struct aes_block *)src;
+	void *dummy0;
+	int dummy1;
+
+	kernel_neon_begin_partial(4);
+
+	__asm__("	ld1	{v0.16b}, %[in]			;"
+		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	cmp	%w[rounds], #10			;"
+		"	bmi	0f				;"
+		"	bne	3f				;"
+		"	mov	v3.16b, v1.16b			;"
+		"	b	2f				;"
+		"0:	mov	v2.16b, v1.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"1:	aese	v0.16b, v2.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"	aese	v0.16b, v3.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"	subs	%w[rounds], %w[rounds], #3	;"
+		"	aese	v0.16b, v1.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	bpl	1b				;"
+		"	aese	v0.16b, v2.16b			;"
+		"	eor	v0.16b, v0.16b, v3.16b		;"
+		"	st1	{v0.16b}, %[out]		;"
+
+	:	[out]		"=Q"(*out),
+		[key]		"=r"(dummy0),
+		[rounds]	"=r"(dummy1)
+	:	[in]		"Q"(*in),
+				"1"(ctx->key_enc),
+				"2"(num_rounds(ctx) - 2)
+	:	"cc");
+
+	kernel_neon_end();
+}
+
+static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aes_block *out = (struct aes_block *)dst;
+	struct aes_block const *in = (struct aes_block *)src;
+	void *dummy0;
+	int dummy1;
+
+	kernel_neon_begin_partial(4);
+
+	__asm__("	ld1	{v0.16b}, %[in]			;"
+		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	cmp	%w[rounds], #10			;"
+		"	bmi	0f				;"
+		"	bne	3f				;"
+		"	mov	v3.16b, v1.16b			;"
+		"	b	2f				;"
+		"0:	mov	v2.16b, v1.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"1:	aesd	v0.16b, v2.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"	aesd	v0.16b, v3.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"	subs	%w[rounds], %w[rounds], #3	;"
+		"	aesd	v0.16b, v1.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	bpl	1b				;"
+		"	aesd	v0.16b, v2.16b			;"
+		"	eor	v0.16b, v0.16b, v3.16b		;"
+		"	st1	{v0.16b}, %[out]		;"
+
+	:	[out]		"=Q"(*out),
+		[key]		"=r"(dummy0),
+		[rounds]	"=r"(dummy1)
+	:	[in]		"Q"(*in),
+				"1"(ctx->key_dec),
+				"2"(num_rounds(ctx) - 2)
+	:	"cc");
+
+	kernel_neon_end();
+}
+
+/*
+ * aes_sub() - use the aese instruction to perform the AES sbox substitution
+ *             on each byte in 'input'
+ */
+static u32 aes_sub(u32 input)
+{
+	u32 ret;
+
+	__asm__("dup	v1.4s, %w[in]		;"
+		"movi	v0.16b, #0		;"
+		"aese	v0.16b, v1.16b		;"
+		"umov	%w[out], v0.4s[0]	;"
+
+	:	[out]	"=r"(ret)
+	:	[in]	"r"(input)
+	:		"v0","v1");
+
+	return ret;
+}
+
+int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		     unsigned int key_len)
+{
+	/*
+	 * The AES key schedule round constants
+	 */
+	static u8 const rcon[] = {
+		0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
+	};
+
+	u32 kwords = key_len / sizeof(u32);
+	struct aes_block *key_enc, *key_dec;
+	int i, j;
+
+	if (key_len != AES_KEYSIZE_128 &&
+	    key_len != AES_KEYSIZE_192 &&
+	    key_len != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key_enc, in_key, key_len);
+	ctx->key_length = key_len;
+
+	kernel_neon_begin_partial(2);
+	for (i = 0; i < sizeof(rcon); i++) {
+		u32 *rki = ctx->key_enc + (i * kwords);
+		u32 *rko = rki + kwords;
+
+		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+		rko[1] = rko[0] ^ rki[1];
+		rko[2] = rko[1] ^ rki[2];
+		rko[3] = rko[2] ^ rki[3];
+
+		if (key_len == AES_KEYSIZE_192) {
+			if (i >= 7)
+				break;
+			rko[4] = rko[3] ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+		} else if (key_len == AES_KEYSIZE_256) {
+			if (i >= 6)
+				break;
+			rko[4] = aes_sub(rko[3]) ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+			rko[6] = rko[5] ^ rki[6];
+			rko[7] = rko[6] ^ rki[7];
+		}
+	}
+
+	/*
+	 * Generate the decryption keys for the Equivalent Inverse Cipher.
+	 * This involves reversing the order of the round keys, and applying
+	 * the Inverse Mix Columns transformation on all but the first and
+	 * the last one.
+	 */
+	key_enc = (struct aes_block *)ctx->key_enc;
+	key_dec = (struct aes_block *)ctx->key_dec;
+	j = num_rounds(ctx);
+
+	key_dec[0] = key_enc[j];
+	for (i = 1, j--; j > 0; i++, j--)
+		__asm__("ld1	{v0.16b}, %[in]		;"
+			"aesimc	v1.16b, v0.16b		;"
+			"st1	{v1.16b}, %[out]	;"
+
+		:	[out]	"=Q"(key_dec[i])
+		:	[in]	"Q"(key_enc[j])
+		:		"v0","v1");
+	key_dec[i] = key_enc[0];
+
+	kernel_neon_end();
+	return 0;
+}
+EXPORT_SYMBOL(ce_aes_expandkey);
+
+int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		  unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = ce_aes_expandkey(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ce_aes_setkey);
+
+static struct crypto_alg aes_alg = {
+	.cra_name		= "aes",
+	.cra_driver_name	= "aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_cipher = {
+		.cia_min_keysize	= AES_MIN_KEY_SIZE,
+		.cia_max_keysize	= AES_MAX_KEY_SIZE,
+		.cia_setkey		= ce_aes_setkey,
+		.cia_encrypt		= aes_cipher_encrypt,
+		.cia_decrypt		= aes_cipher_decrypt
+	}
+};
+
+static int __init aes_mod_init(void)
+{
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_cpu_feature_match(AES, aes_mod_init);
+module_exit(aes_mod_exit);
diff --git a/arch/arm64/crypto/aes-ce-setkey.h b/arch/arm64/crypto/aes-ce-setkey.h
new file mode 100644
index 000000000..f08a6471d
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-setkey.h
@@ -0,0 +1,5 @@
+
+int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		  unsigned int key_len);
+int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		     unsigned int key_len);
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
new file mode 100644
index 000000000..78f3cfe92
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce.S
@@ -0,0 +1,129 @@
+/*
+ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
+ *                                    Crypto Extensions
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func)		ENTRY(ce_ ## func)
+#define AES_ENDPROC(func)	ENDPROC(ce_ ## func)
+
+	.arch		armv8-a+crypto
+
+	/* preload all round keys */
+	.macro		load_round_keys, rounds, rk
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	ld1		{v17.16b-v18.16b}, [\rk], #32
+1111:	ld1		{v19.16b-v20.16b}, [\rk], #32
+2222:	ld1		{v21.16b-v24.16b}, [\rk], #64
+	ld1		{v25.16b-v28.16b}, [\rk], #64
+	ld1		{v29.16b-v31.16b}, [\rk]
+	.endm
+
+	/* prepare for encryption with key in rk[] */
+	.macro		enc_prepare, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	/* prepare for encryption (again) but with new key in rk[] */
+	.macro		enc_switch_key, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	/* prepare for decryption with key in rk[] */
+	.macro		dec_prepare, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3
+	aes\de		\i0\().16b, \k\().16b
+	aes\mc		\i0\().16b, \i0\().16b
+	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
+	aes\mc		\i1\().16b, \i1\().16b
+	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
+	aes\mc		\i2\().16b, \i2\().16b
+	aes\de		\i3\().16b, \k\().16b
+	aes\mc		\i3\().16b, \i3\().16b
+	.endif
+	.endif
+	.endm
+
+	/* up to 4 interleaved encryption rounds with the same round key */
+	.macro		round_Nx, enc, k, i0, i1, i2, i3
+	.ifc		\enc, e
+	do_enc_Nx	e, mc, \k, \i0, \i1, \i2, \i3
+	.else
+	do_enc_Nx	d, imc, \k, \i0, \i1, \i2, \i3
+	.endif
+	.endm
+
+	/* up to 4 interleaved final rounds */
+	.macro		fin_round_Nx, de, k, k2, i0, i1, i2, i3
+	aes\de		\i0\().16b, \k\().16b
+	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
+	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
+	aes\de		\i3\().16b, \k\().16b
+	.endif
+	.endif
+	eor		\i0\().16b, \i0\().16b, \k2\().16b
+	.ifnb		\i1
+	eor		\i1\().16b, \i1\().16b, \k2\().16b
+	.ifnb		\i3
+	eor		\i2\().16b, \i2\().16b, \k2\().16b
+	eor		\i3\().16b, \i3\().16b, \k2\().16b
+	.endif
+	.endif
+	.endm
+
+	/* up to 4 interleaved blocks */
+	.macro		do_block_Nx, enc, rounds, i0, i1, i2, i3
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	round_Nx	\enc, v17, \i0, \i1, \i2, \i3
+	round_Nx	\enc, v18, \i0, \i1, \i2, \i3
+1111:	round_Nx	\enc, v19, \i0, \i1, \i2, \i3
+	round_Nx	\enc, v20, \i0, \i1, \i2, \i3
+2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	round_Nx	\enc, \key, \i0, \i1, \i2, \i3
+	.endr
+	fin_round_Nx	\enc, v30, v31, \i0, \i1, \i2, \i3
+	.endm
+
+	.macro		encrypt_block, in, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \in
+	.endm
+
+	.macro		encrypt_block2x, i0, i1, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \i0, \i1
+	.endm
+
+	.macro		encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \i0, \i1, \i2, \i3
+	.endm
+
+	.macro		decrypt_block, in, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \in
+	.endm
+
+	.macro		decrypt_block2x, i0, i1, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \i0, \i1
+	.endm
+
+	.macro		decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \i0, \i1, \i2, \i3
+	.endm
+
+#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
new file mode 100644
index 000000000..05d9e16c0
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue.c
@@ -0,0 +1,456 @@
+/*
+ * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+
+#include "aes-ce-setkey.h"
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+#define MODE			"ce"
+#define PRIO			300
+#define aes_setkey		ce_aes_setkey
+#define aes_expandkey		ce_aes_expandkey
+#define aes_ecb_encrypt		ce_aes_ecb_encrypt
+#define aes_ecb_decrypt		ce_aes_ecb_decrypt
+#define aes_cbc_encrypt		ce_aes_cbc_encrypt
+#define aes_cbc_decrypt		ce_aes_cbc_decrypt
+#define aes_ctr_encrypt		ce_aes_ctr_encrypt
+#define aes_xts_encrypt		ce_aes_xts_encrypt
+#define aes_xts_decrypt		ce_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+#else
+#define MODE			"neon"
+#define PRIO			200
+#define aes_setkey		crypto_aes_set_key
+#define aes_expandkey		crypto_aes_expand_key
+#define aes_ecb_encrypt		neon_aes_ecb_encrypt
+#define aes_ecb_decrypt		neon_aes_ecb_decrypt
+#define aes_cbc_encrypt		neon_aes_cbc_encrypt
+#define aes_cbc_decrypt		neon_aes_cbc_decrypt
+#define aes_ctr_encrypt		neon_aes_ctr_encrypt
+#define aes_xts_encrypt		neon_aes_xts_encrypt
+#define aes_xts_decrypt		neon_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
+#endif
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-modes.S */
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, int first);
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, int first);
+
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 ctr[], int first);
+
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+				int rounds, int blocks, u8 const rk2[], u8 iv[],
+				int first);
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+				int rounds, int blocks, u8 const rk2[], u8 iv[],
+				int first);
+
+struct crypto_aes_xts_ctx {
+	struct crypto_aes_ctx key1;
+	struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = aes_expandkey(&ctx->key1, in_key, key_len / 2);
+	if (!ret)
+		ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2],
+				    key_len / 2);
+	if (!ret)
+		return 0;
+
+	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_dec, rounds, blocks, first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+	first = 1;
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+				first);
+		first = 0;
+		nbytes -= blocks * AES_BLOCK_SIZE;
+		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+			break;
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	if (nbytes) {
+		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+		/*
+		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+		 * to tell aes_ctr_encrypt() to only read half a block.
+		 */
+		blocks = (nbytes <= 8) ? -1 : 1;
+
+		aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+				blocks, walk.iv, first);
+		memcpy(tdst, tail, nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key1.key_enc, rounds, blocks,
+				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key1.key_dec, rounds, blocks,
+				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+	.cra_name		= "__ecb-aes-" MODE,
+	.cra_driver_name	= "__driver-ecb-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= aes_setkey,
+		.encrypt	= ecb_encrypt,
+		.decrypt	= ecb_decrypt,
+	},
+}, {
+	.cra_name		= "__cbc-aes-" MODE,
+	.cra_driver_name	= "__driver-cbc-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= aes_setkey,
+		.encrypt	= cbc_encrypt,
+		.decrypt	= cbc_decrypt,
+	},
+}, {
+	.cra_name		= "__ctr-aes-" MODE,
+	.cra_driver_name	= "__driver-ctr-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= aes_setkey,
+		.encrypt	= ctr_encrypt,
+		.decrypt	= ctr_encrypt,
+	},
+}, {
+	.cra_name		= "__xts-aes-" MODE,
+	.cra_driver_name	= "__driver-xts-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= xts_set_key,
+		.encrypt	= xts_encrypt,
+		.decrypt	= xts_decrypt,
+	},
+}, {
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "ecb-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "xts(aes)",
+	.cra_driver_name	= "xts-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+} };
+
+static int __init aes_init(void)
+{
+	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+module_cpu_feature_match(AES, aes_init);
+#else
+module_init(aes_init);
+#endif
+module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
new file mode 100644
index 000000000..f6e372c52
--- /dev/null
+++ b/arch/arm64/crypto/aes-modes.S
@@ -0,0 +1,532 @@
+/*
+ * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* included by aes-ce.S and aes-neon.S */
+
+	.text
+	.align		4
+
+/*
+ * There are several ways to instantiate this code:
+ * - no interleave, all inline
+ * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
+ * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
+ * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
+ * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
+ *
+ * Macros imported by this code:
+ * - enc_prepare	- setup NEON registers for encryption
+ * - dec_prepare	- setup NEON registers for decryption
+ * - enc_switch_key	- change to new key after having prepared for encryption
+ * - encrypt_block	- encrypt a single block
+ * - decrypt block	- decrypt a single block
+ * - encrypt_block2x	- encrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - decrypt_block2x	- decrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - encrypt_block4x	- encrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ * - decrypt_block4x	- decrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ */
+
+#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
+#define FRAME_PUSH	stp x29, x30, [sp,#-16]! ; mov x29, sp
+#define FRAME_POP	ldp x29, x30, [sp],#16
+
+#if INTERLEAVE == 2
+
+aes_encrypt_block2x:
+	encrypt_block2x	v0, v1, w3, x2, x6, w7
+	ret
+ENDPROC(aes_encrypt_block2x)
+
+aes_decrypt_block2x:
+	decrypt_block2x	v0, v1, w3, x2, x6, w7
+	ret
+ENDPROC(aes_decrypt_block2x)
+
+#elif INTERLEAVE == 4
+
+aes_encrypt_block4x:
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	ret
+ENDPROC(aes_encrypt_block4x)
+
+aes_decrypt_block4x:
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	ret
+ENDPROC(aes_decrypt_block4x)
+
+#else
+#error INTERLEAVE should equal 2 or 4
+#endif
+
+	.macro		do_encrypt_block2x
+	bl		aes_encrypt_block2x
+	.endm
+
+	.macro		do_decrypt_block2x
+	bl		aes_decrypt_block2x
+	.endm
+
+	.macro		do_encrypt_block4x
+	bl		aes_encrypt_block4x
+	.endm
+
+	.macro		do_decrypt_block4x
+	bl		aes_decrypt_block4x
+	.endm
+
+#else
+#define FRAME_PUSH
+#define FRAME_POP
+
+	.macro		do_encrypt_block2x
+	encrypt_block2x	v0, v1, w3, x2, x6, w7
+	.endm
+
+	.macro		do_decrypt_block2x
+	decrypt_block2x	v0, v1, w3, x2, x6, w7
+	.endm
+
+	.macro		do_encrypt_block4x
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	.endm
+
+	.macro		do_decrypt_block4x
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	.endm
+
+#endif
+
+	/*
+	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, int first)
+	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, int first)
+	 */
+
+AES_ENTRY(aes_ecb_encrypt)
+	FRAME_PUSH
+	cbz		w5, .LecbencloopNx
+
+	enc_prepare	w3, x2, x5
+
+.LecbencloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lecbenc1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
+	do_encrypt_block2x
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	do_encrypt_block4x
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LecbencloopNx
+.Lecbenc1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lecbencout
+#endif
+.Lecbencloop:
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	encrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lecbencloop
+.Lecbencout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ecb_encrypt)
+
+
+AES_ENTRY(aes_ecb_decrypt)
+	FRAME_PUSH
+	cbz		w5, .LecbdecloopNx
+
+	dec_prepare	w3, x2, x5
+
+.LecbdecloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lecbdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	do_decrypt_block2x
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	do_decrypt_block4x
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LecbdecloopNx
+.Lecbdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lecbdecout
+#endif
+.Lecbdecloop:
+	ld1		{v0.16b}, [x1], #16		/* get next ct block */
+	decrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lecbdecloop
+.Lecbdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ecb_decrypt)
+
+
+	/*
+	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[], int first)
+	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[], int first)
+	 */
+
+AES_ENTRY(aes_cbc_encrypt)
+	cbz		w6, .Lcbcencloop
+
+	ld1		{v0.16b}, [x5]			/* get iv */
+	enc_prepare	w3, x2, x5
+
+.Lcbcencloop:
+	ld1		{v1.16b}, [x1], #16		/* get next pt block */
+	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
+	encrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lcbcencloop
+	ret
+AES_ENDPROC(aes_cbc_encrypt)
+
+
+AES_ENTRY(aes_cbc_decrypt)
+	FRAME_PUSH
+	cbz		w6, .LcbcdecloopNx
+
+	ld1		{v7.16b}, [x5]			/* get iv */
+	dec_prepare	w3, x2, x5
+
+.LcbcdecloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lcbcdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	mov		v2.16b, v0.16b
+	mov		v3.16b, v1.16b
+	do_decrypt_block2x
+	eor		v0.16b, v0.16b, v7.16b
+	eor		v1.16b, v1.16b, v2.16b
+	mov		v7.16b, v3.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	mov		v4.16b, v0.16b
+	mov		v5.16b, v1.16b
+	mov		v6.16b, v2.16b
+	do_decrypt_block4x
+	sub		x1, x1, #16
+	eor		v0.16b, v0.16b, v7.16b
+	eor		v1.16b, v1.16b, v4.16b
+	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
+	eor		v2.16b, v2.16b, v5.16b
+	eor		v3.16b, v3.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LcbcdecloopNx
+.Lcbcdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lcbcdecout
+#endif
+.Lcbcdecloop:
+	ld1		{v1.16b}, [x1], #16		/* get next ct block */
+	mov		v0.16b, v1.16b			/* ...and copy to v0 */
+	decrypt_block	v0, w3, x2, x5, w6
+	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
+	mov		v7.16b, v1.16b			/* ct is next iv */
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lcbcdecloop
+.Lcbcdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_cbc_decrypt)
+
+
+	/*
+	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 ctr[], int first)
+	 */
+
+AES_ENTRY(aes_ctr_encrypt)
+	FRAME_PUSH
+	cbnz		w6, .Lctrfirst		/* 1st time around? */
+	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x5, x5
+#if INTERLEAVE >= 2
+	cmn		w5, w4			/* 32 bit overflow? */
+	bcs		.Lctrinc
+	add		x5, x5, #1		/* increment BE ctr */
+	b		.LctrincNx
+#else
+	b		.Lctrinc
+#endif
+.Lctrfirst:
+	enc_prepare	w3, x2, x6
+	ld1		{v4.16b}, [x5]
+	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x5, x5
+#if INTERLEAVE >= 2
+	cmn		w5, w4			/* 32 bit overflow? */
+	bcs		.Lctrloop
+.LctrloopNx:
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lctr1x
+#if INTERLEAVE == 2
+	mov		v0.8b, v4.8b
+	mov		v1.8b, v4.8b
+	rev		x7, x5
+	add		x5, x5, #1
+	ins		v0.d[1], x7
+	rev		x7, x5
+	add		x5, x5, #1
+	ins		v1.d[1], x7
+	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
+	do_encrypt_block2x
+	eor		v0.16b, v0.16b, v2.16b
+	eor		v1.16b, v1.16b, v3.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
+	dup		v7.4s, w5
+	mov		v0.16b, v4.16b
+	add		v7.4s, v7.4s, v8.4s
+	mov		v1.16b, v4.16b
+	rev32		v8.16b, v7.16b
+	mov		v2.16b, v4.16b
+	mov		v3.16b, v4.16b
+	mov		v1.s[3], v8.s[0]
+	mov		v2.s[3], v8.s[1]
+	mov		v3.s[3], v8.s[2]
+	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
+	do_encrypt_block4x
+	eor		v0.16b, v5.16b, v0.16b
+	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
+	eor		v1.16b, v6.16b, v1.16b
+	eor		v2.16b, v7.16b, v2.16b
+	eor		v3.16b, v5.16b, v3.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	add		x5, x5, #INTERLEAVE
+#endif
+	cbz		w4, .LctroutNx
+.LctrincNx:
+	rev		x7, x5
+	ins		v4.d[1], x7
+	b		.LctrloopNx
+.LctroutNx:
+	sub		x5, x5, #1
+	rev		x7, x5
+	ins		v4.d[1], x7
+	b		.Lctrout
+.Lctr1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lctrout
+#endif
+.Lctrloop:
+	mov		v0.16b, v4.16b
+	encrypt_block	v0, w3, x2, x6, w7
+	subs		w4, w4, #1
+	bmi		.Lctrhalfblock		/* blocks < 0 means 1/2 block */
+	ld1		{v3.16b}, [x1], #16
+	eor		v3.16b, v0.16b, v3.16b
+	st1		{v3.16b}, [x0], #16
+	beq		.Lctrout
+.Lctrinc:
+	adds		x5, x5, #1		/* increment BE ctr */
+	rev		x7, x5
+	ins		v4.d[1], x7
+	bcc		.Lctrloop		/* no overflow? */
+	umov		x7, v4.d[0]		/* load upper word of ctr  */
+	rev		x7, x7			/* ... to handle the carry */
+	add		x7, x7, #1
+	rev		x7, x7
+	ins		v4.d[0], x7
+	b		.Lctrloop
+.Lctrhalfblock:
+	ld1		{v3.8b}, [x1]
+	eor		v3.8b, v0.8b, v3.8b
+	st1		{v3.8b}, [x0]
+.Lctrout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ctr_encrypt)
+	.ltorg
+
+
+	/*
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
+	 */
+
+	.macro		next_tweak, out, in, const, tmp
+	sshr		\tmp\().2d,  \in\().2d,   #63
+	and		\tmp\().16b, \tmp\().16b, \const\().16b
+	add		\out\().2d,  \in\().2d,   \in\().2d
+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+	eor		\out\().16b, \out\().16b, \tmp\().16b
+	.endm
+
+.Lxts_mul_x:
+	.word		1, 0, 0x87, 0
+
+AES_ENTRY(aes_xts_encrypt)
+	FRAME_PUSH
+	cbz		w7, .LxtsencloopNx
+
+	ld1		{v4.16b}, [x6]
+	enc_prepare	w3, x5, x6
+	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
+	enc_switch_key	w3, x2, x6
+	ldr		q7, .Lxts_mul_x
+	b		.LxtsencNx
+
+.LxtsencloopNx:
+	ldr		q7, .Lxts_mul_x
+	next_tweak	v4, v4, v7, v8
+.LxtsencNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lxtsenc1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	do_encrypt_block2x
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+	cbz		w4, .LxtsencoutNx
+	next_tweak	v4, v5, v7, v8
+	b		.LxtsencNx
+.LxtsencoutNx:
+	mov		v4.16b, v5.16b
+	b		.Lxtsencout
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	next_tweak	v6, v5, v7, v8
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	next_tweak	v7, v6, v7, v8
+	eor		v3.16b, v3.16b, v7.16b
+	do_encrypt_block4x
+	eor		v3.16b, v3.16b, v7.16b
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v7.16b
+	cbz		w4, .Lxtsencout
+	b		.LxtsencloopNx
+#endif
+.Lxtsenc1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lxtsencout
+#endif
+.Lxtsencloop:
+	ld1		{v1.16b}, [x1], #16
+	eor		v0.16b, v1.16b, v4.16b
+	encrypt_block	v0, w3, x2, x6, w7
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	beq		.Lxtsencout
+	next_tweak	v4, v4, v7, v8
+	b		.Lxtsencloop
+.Lxtsencout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_xts_encrypt)
+
+
+AES_ENTRY(aes_xts_decrypt)
+	FRAME_PUSH
+	cbz		w7, .LxtsdecloopNx
+
+	ld1		{v4.16b}, [x6]
+	enc_prepare	w3, x5, x6
+	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
+	dec_prepare	w3, x2, x6
+	ldr		q7, .Lxts_mul_x
+	b		.LxtsdecNx
+
+.LxtsdecloopNx:
+	ldr		q7, .Lxts_mul_x
+	next_tweak	v4, v4, v7, v8
+.LxtsdecNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lxtsdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	do_decrypt_block2x
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+	cbz		w4, .LxtsdecoutNx
+	next_tweak	v4, v5, v7, v8
+	b		.LxtsdecNx
+.LxtsdecoutNx:
+	mov		v4.16b, v5.16b
+	b		.Lxtsdecout
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	next_tweak	v6, v5, v7, v8
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	next_tweak	v7, v6, v7, v8
+	eor		v3.16b, v3.16b, v7.16b
+	do_decrypt_block4x
+	eor		v3.16b, v3.16b, v7.16b
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v7.16b
+	cbz		w4, .Lxtsdecout
+	b		.LxtsdecloopNx
+#endif
+.Lxtsdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lxtsdecout
+#endif
+.Lxtsdecloop:
+	ld1		{v1.16b}, [x1], #16
+	eor		v0.16b, v1.16b, v4.16b
+	decrypt_block	v0, w3, x2, x6, w7
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	beq		.Lxtsdecout
+	next_tweak	v4, v4, v7, v8
+	b		.Lxtsdecloop
+.Lxtsdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
new file mode 100644
index 000000000..b93170e1c
--- /dev/null
+++ b/arch/arm64/crypto/aes-neon.S
@@ -0,0 +1,382 @@
+/*
+ * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func)		ENTRY(neon_ ## func)
+#define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
+
+	/* multiply by polynomial 'x' in GF(2^8) */
+	.macro		mul_by_x, out, in, temp, const
+	sshr		\temp, \in, #7
+	add		\out, \in, \in
+	and		\temp, \temp, \const
+	eor		\out, \out, \temp
+	.endm
+
+	/* preload the entire Sbox */
+	.macro		prepare, sbox, shiftrows, temp
+	adr		\temp, \sbox
+	movi		v12.16b, #0x40
+	ldr		q13, \shiftrows
+	movi		v14.16b, #0x1b
+	ld1		{v16.16b-v19.16b}, [\temp], #64
+	ld1		{v20.16b-v23.16b}, [\temp], #64
+	ld1		{v24.16b-v27.16b}, [\temp], #64
+	ld1		{v28.16b-v31.16b}, [\temp]
+	.endm
+
+	/* do preload for encryption */
+	.macro		enc_prepare, ignore0, ignore1, temp
+	prepare		.LForward_Sbox, .LForward_ShiftRows, \temp
+	.endm
+
+	.macro		enc_switch_key, ignore0, ignore1, temp
+	/* do nothing */
+	.endm
+
+	/* do preload for decryption */
+	.macro		dec_prepare, ignore0, ignore1, temp
+	prepare		.LReverse_Sbox, .LReverse_ShiftRows, \temp
+	.endm
+
+	/* apply SubBytes transformation using the the preloaded Sbox */
+	.macro		sub_bytes, in
+	sub		v9.16b, \in\().16b, v12.16b
+	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
+	sub		v10.16b, v9.16b, v12.16b
+	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v11.16b, v10.16b, v12.16b
+	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
+	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
+	.endm
+
+	/* apply MixColumns transformation */
+	.macro		mix_columns, in
+	mul_by_x	v10.16b, \in\().16b, v9.16b, v14.16b
+	rev32		v8.8h, \in\().8h
+	eor		\in\().16b, v10.16b, \in\().16b
+	shl		v9.4s, v8.4s, #24
+	shl		v11.4s, \in\().4s, #24
+	sri		v9.4s, v8.4s, #8
+	sri		v11.4s, \in\().4s, #8
+	eor		v9.16b, v9.16b, v8.16b
+	eor		v10.16b, v10.16b, v9.16b
+	eor		\in\().16b, v10.16b, v11.16b
+	.endm
+
+	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
+	.macro		inv_mix_columns, in
+	mul_by_x	v11.16b, \in\().16b, v10.16b, v14.16b
+	mul_by_x	v11.16b, v11.16b, v10.16b, v14.16b
+	eor		\in\().16b, \in\().16b, v11.16b
+	rev32		v11.8h, v11.8h
+	eor		\in\().16b, \in\().16b, v11.16b
+	mix_columns	\in
+	.endm
+
+	.macro		do_block, enc, in, rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
+	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
+	sub_bytes	\in
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns	\in
+	.else
+	inv_mix_columns	\in
+	.endif
+	b		1111b
+2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
+	.endm
+
+	.macro		encrypt_block, in, rounds, rk, rkp, i
+	do_block	1, \in, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block, in, rounds, rk, rkp, i
+	do_block	0, \in, \rounds, \rk, \rkp, \i
+	.endm
+
+	/*
+	 * Interleaved versions: functionally equivalent to the
+	 * ones above, but applied to 2 or 4 AES states in parallel.
+	 */
+
+	.macro		sub_bytes_2x, in0, in1
+	sub		v8.16b, \in0\().16b, v12.16b
+	sub		v9.16b, \in1\().16b, v12.16b
+	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+	sub		v10.16b, v8.16b, v12.16b
+	sub		v11.16b, v9.16b, v12.16b
+	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
+	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v8.16b, v10.16b, v12.16b
+	sub		v9.16b, v11.16b, v12.16b
+	tbx		\in0\().16b, {v24.16b-v27.16b}, v10.16b
+	tbx		\in1\().16b, {v24.16b-v27.16b}, v11.16b
+	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
+	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
+	.endm
+
+	.macro		sub_bytes_4x, in0, in1, in2, in3
+	sub		v8.16b, \in0\().16b, v12.16b
+	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+	sub		v9.16b, \in1\().16b, v12.16b
+	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+	sub		v10.16b, \in2\().16b, v12.16b
+	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
+	sub		v11.16b, \in3\().16b, v12.16b
+	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
+	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
+	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v8.16b, v8.16b, v12.16b
+	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
+	sub		v9.16b, v9.16b, v12.16b
+	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
+	sub		v10.16b, v10.16b, v12.16b
+	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
+	sub		v11.16b, v11.16b, v12.16b
+	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
+	sub		v8.16b, v8.16b, v12.16b
+	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
+	sub		v9.16b, v9.16b, v12.16b
+	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
+	sub		v10.16b, v10.16b, v12.16b
+	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
+	sub		v11.16b, v11.16b, v12.16b
+	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
+	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
+	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
+	.endm
+
+	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
+	sshr		\tmp0\().16b, \in0\().16b,  #7
+	add		\out0\().16b, \in0\().16b,  \in0\().16b
+	sshr		\tmp1\().16b, \in1\().16b,  #7
+	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
+	add		\out1\().16b, \in1\().16b,  \in1\().16b
+	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
+	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
+	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
+	.endm
+
+	.macro		mix_columns_2x, in0, in1
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	rev32		v10.8h, \in0\().8h
+	rev32		v11.8h, \in1\().8h
+	eor		\in0\().16b, v8.16b, \in0\().16b
+	eor		\in1\().16b, v9.16b, \in1\().16b
+	shl		v12.4s, v10.4s, #24
+	shl		v13.4s, v11.4s, #24
+	eor		v8.16b, v8.16b, v10.16b
+	sri		v12.4s, v10.4s, #8
+	shl		v10.4s, \in0\().4s, #24
+	eor		v9.16b, v9.16b, v11.16b
+	sri		v13.4s, v11.4s, #8
+	shl		v11.4s, \in1\().4s, #24
+	sri		v10.4s, \in0\().4s, #8
+	eor		\in0\().16b, v8.16b, v12.16b
+	sri		v11.4s, \in1\().4s, #8
+	eor		\in1\().16b, v9.16b, v13.16b
+	eor		\in0\().16b, v10.16b, \in0\().16b
+	eor		\in1\().16b, v11.16b, \in1\().16b
+	.endm
+
+	.macro		inv_mix_cols_2x, in0, in1
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	mul_by_x_2x	v8, v9, v8, v9, v10, v11, v14
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	rev32		v8.8h, v8.8h
+	rev32		v9.8h, v9.8h
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	mix_columns_2x	\in0, \in1
+	.endm
+
+	.macro		inv_mix_cols_4x, in0, in1, in2, in3
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	mul_by_x_2x	v10, v11, \in2, \in3, v12, v13, v14
+	mul_by_x_2x	v8, v9, v8, v9, v12, v13, v14
+	mul_by_x_2x	v10, v11, v10, v11, v12, v13, v14
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	eor		\in2\().16b, \in2\().16b, v10.16b
+	eor		\in3\().16b, \in3\().16b, v11.16b
+	rev32		v8.8h, v8.8h
+	rev32		v9.8h, v9.8h
+	rev32		v10.8h, v10.8h
+	rev32		v11.8h, v11.8h
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	eor		\in2\().16b, \in2\().16b, v10.16b
+	eor		\in3\().16b, \in3\().16b, v11.16b
+	mix_columns_2x	\in0, \in1
+	mix_columns_2x	\in2, \in3
+	.endm
+
+	.macro		do_block_2x, enc, in0, in1 rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	sub_bytes_2x	\in0, \in1
+	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns_2x	\in0, \in1
+	ldr		q13, .LForward_ShiftRows
+	.else
+	inv_mix_cols_2x	\in0, \in1
+	ldr		q13, .LReverse_ShiftRows
+	.endif
+	movi		v12.16b, #0x40
+	b		1111b
+2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	.endm
+
+	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
+	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
+	sub_bytes_4x	\in0, \in1, \in2, \in3
+	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns_2x	\in0, \in1
+	mix_columns_2x	\in2, \in3
+	ldr		q13, .LForward_ShiftRows
+	.else
+	inv_mix_cols_4x	\in0, \in1, \in2, \in3
+	ldr		q13, .LReverse_ShiftRows
+	.endif
+	movi		v12.16b, #0x40
+	b		1111b
+2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
+	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
+	.endm
+
+	.macro		encrypt_block2x, in0, in1, rounds, rk, rkp, i
+	do_block_2x	1, \in0, \in1, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block2x, in0, in1, rounds, rk, rkp, i
+	do_block_2x	0, \in0, \in1, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+	.endm
+
+#include "aes-modes.S"
+
+	.text
+	.align		4
+.LForward_ShiftRows:
+	.byte		0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
+	.byte		0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+
+.LReverse_ShiftRows:
+	.byte		0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
+	.byte		0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+
+.LForward_Sbox:
+	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+	.byte		0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+	.byte		0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+	.byte		0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+	.byte		0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+	.byte		0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+	.byte		0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+	.byte		0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+	.byte		0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+	.byte		0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+	.byte		0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+	.byte		0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+	.byte		0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+	.byte		0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+	.byte		0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+	.byte		0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+	.byte		0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+	.byte		0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+	.byte		0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+	.byte		0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+	.byte		0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+	.byte		0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+	.byte		0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+	.byte		0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+	.byte		0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+	.byte		0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+	.byte		0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+	.byte		0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+	.byte		0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+	.byte		0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+	.byte		0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+	.byte		0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.LReverse_Sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
new file mode 100644
index 000000000..6a37c3c6b
--- /dev/null
+++ b/arch/arm64/crypto/crc32-arm64.c
@@ -0,0 +1,290 @@
+/*
+ * crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
+ *
+ * Module based on crypto/crc32c_generic.c
+ *
+ * CRC32 loop taken from Ed Nevill's Hadoop CRC patch
+ * http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
+ *
+ * Using inline assembly instead of intrinsics in order to be backwards
+ * compatible with older compilers.
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/unaligned/access_ok.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/hash.h>
+
+MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam@linaro.org>");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
+MODULE_LICENSE("GPL v2");
+
+#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+
+static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
+{
+	s64 length = len;
+
+	while ((length -= sizeof(u64)) >= 0) {
+		CRC32X(crc, get_unaligned_le64(p));
+		p += sizeof(u64);
+	}
+
+	/* The following is more efficient than the straight loop */
+	if (length & sizeof(u32)) {
+		CRC32W(crc, get_unaligned_le32(p));
+		p += sizeof(u32);
+	}
+	if (length & sizeof(u16)) {
+		CRC32H(crc, get_unaligned_le16(p));
+		p += sizeof(u16);
+	}
+	if (length & sizeof(u8))
+		CRC32B(crc, *p);
+
+	return crc;
+}
+
+static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
+{
+	s64 length = len;
+
+	while ((length -= sizeof(u64)) >= 0) {
+		CRC32CX(crc, get_unaligned_le64(p));
+		p += sizeof(u64);
+	}
+
+	/* The following is more efficient than the straight loop */
+	if (length & sizeof(u32)) {
+		CRC32CW(crc, get_unaligned_le32(p));
+		p += sizeof(u32);
+	}
+	if (length & sizeof(u16)) {
+		CRC32CH(crc, get_unaligned_le16(p));
+		p += sizeof(u16);
+	}
+	if (length & sizeof(u8))
+		CRC32CB(crc, *p);
+
+	return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+struct chksum_ctx {
+	u32 key;
+};
+
+struct chksum_desc_ctx {
+	u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (keylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = get_unaligned_le32(key);
+	return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(ctx->crc, out);
+	return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(~ctx->crc, out);
+	return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int crc32_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = 0;
+	return 0;
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static struct shash_alg crc32_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksum_update,
+	.final			=	chksum_final,
+	.finup			=	chksum_finup,
+	.digest			=	chksum_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32",
+		.cra_driver_name	=	"crc32-arm64-hw",
+		.cra_priority		=	300,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	crc32_cra_init,
+	}
+};
+
+static struct shash_alg crc32c_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksumc_update,
+	.final			=	chksumc_final,
+	.finup			=	chksumc_finup,
+	.digest			=	chksumc_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32c",
+		.cra_driver_name	=	"crc32c-arm64-hw",
+		.cra_priority		=	300,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	crc32c_cra_init,
+	}
+};
+
+static int __init crc32_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_shash(&crc32_alg);
+
+	if (err)
+		return err;
+
+	err = crypto_register_shash(&crc32c_alg);
+
+	if (err) {
+		crypto_unregister_shash(&crc32_alg);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+	crypto_unregister_shash(&crc32_alg);
+	crypto_unregister_shash(&crc32c_alg);
+}
+
+module_cpu_feature_match(CRC32, crc32_mod_init);
+module_exit(crc32_mod_exit);
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644
index 000000000..dc4570158
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -0,0 +1,79 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	SHASH	.req	v0
+	SHASH2	.req	v1
+	T1	.req	v2
+	T2	.req	v3
+	MASK	.req	v4
+	XL	.req	v5
+	XM	.req	v6
+	XH	.req	v7
+	IN1	.req	v7
+
+	.text
+	.arch		armv8-a+crypto
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update)
+	ld1		{SHASH.16b}, [x3]
+	ld1		{XL.16b}, [x1]
+	movi		MASK.16b, #0xe1
+	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+	shl		MASK.2d, MASK.2d, #57
+	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
+
+	/* do the head block first, if supplied */
+	cbz		x4, 0f
+	ld1		{T1.2d}, [x4]
+	b		1f
+
+0:	ld1		{T1.2d}, [x2], #16
+	sub		w0, w0, #1
+
+1:	/* multiply XL by SHASH in GF(2^128) */
+CPU_LE(	rev64		T1.16b, T1.16b	)
+
+	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		IN1.16b, T1.16b, T1.16b, #8
+	eor		T1.16b, T1.16b, T2.16b
+	eor		XL.16b, XL.16b, IN1.16b
+
+	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
+	eor		T1.16b, T1.16b, XL.16b
+	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
+	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
+
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, XL.16b, XH.16b
+	eor		XM.16b, XM.16b, T1.16b
+	eor		XM.16b, XM.16b, T2.16b
+	pmull		T2.1q, XL.1d, MASK.1d
+
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	eor		XL.16b, XM.16b, T2.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	pmull		XL.1q, XL.1d, MASK.1d
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
+
+	cbnz		w0, 0b
+
+	st1		{XL.16b}, [x1]
+	ret
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000..833ec1e3f
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -0,0 +1,156 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_key {
+	u64 a;
+	u64 b;
+};
+
+struct ghash_desc_ctx {
+	u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u32 count;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+				   struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+			unsigned int len)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	ctx->count += len;
+
+	if ((partial + len) >= GHASH_BLOCK_SIZE) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+		int blocks;
+
+		if (partial) {
+			int p = GHASH_BLOCK_SIZE - partial;
+
+			memcpy(ctx->buf + partial, src, p);
+			src += p;
+			len -= p;
+		}
+
+		blocks = len / GHASH_BLOCK_SIZE;
+		len %= GHASH_BLOCK_SIZE;
+
+		kernel_neon_begin_partial(8);
+		pmull_ghash_update(blocks, ctx->digest, src, key,
+				   partial ? ctx->buf : NULL);
+		kernel_neon_end();
+		src += blocks * GHASH_BLOCK_SIZE;
+		partial = 0;
+	}
+	if (len)
+		memcpy(ctx->buf + partial, src, len);
+	return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	if (partial) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+		kernel_neon_begin_partial(8);
+		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+		kernel_neon_end();
+	}
+	put_unaligned_be64(ctx->digest[1], dst);
+	put_unaligned_be64(ctx->digest[0], dst + 8);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+	u64 a, b;
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	/* perform multiplication by 'x' in GF(2^128) */
+	b = get_unaligned_be64(inkey);
+	a = get_unaligned_be64(inkey + 8);
+
+	key->a = (a << 1) | (b >> 63);
+	key->b = (b << 1) | (a >> 63);
+
+	if (b >> 63)
+		key->b ^= 0xc200000000000000UL;
+
+	return 0;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "ghash",
+		.cra_driver_name	= "ghash-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_key),
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+	return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
new file mode 100644
index 000000000..033aae6d7
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -0,0 +1,150 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.arch		armv8-a+crypto
+
+	k0		.req	v0
+	k1		.req	v1
+	k2		.req	v2
+	k3		.req	v3
+
+	t0		.req	v4
+	t1		.req	v5
+
+	dga		.req	q6
+	dgav		.req	v6
+	dgb		.req	s7
+	dgbv		.req	v7
+
+	dg0q		.req	q12
+	dg0s		.req	s12
+	dg0v		.req	v12
+	dg1s		.req	s13
+	dg1v		.req	v13
+	dg2s		.req	s14
+
+	.macro		add_only, op, ev, rc, s0, dg1
+	.ifc		\ev, ev
+	add		t1.4s, v\s0\().4s, \rc\().4s
+	sha1h		dg2s, dg0s
+	.ifnb		\dg1
+	sha1\op		dg0q, \dg1, t0.4s
+	.else
+	sha1\op		dg0q, dg1s, t0.4s
+	.endif
+	.else
+	.ifnb		\s0
+	add		t0.4s, v\s0\().4s, \rc\().4s
+	.endif
+	sha1h		dg1s, dg0s
+	sha1\op		dg0q, dg2s, t1.4s
+	.endif
+	.endm
+
+	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
+	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
+	add_only	\op, \ev, \rc, \s1, \dg1
+	sha1su1		v\s0\().4s, v\s3\().4s
+	.endm
+
+	/*
+	 * The SHA1 round constants
+	 */
+	.align		4
+.Lsha1_rcon:
+	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+
+	/*
+	 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+	 *			  int blocks)
+	 */
+ENTRY(sha1_ce_transform)
+	/* load round constants */
+	adr		x6, .Lsha1_rcon
+	ld1r		{k0.4s}, [x6], #4
+	ld1r		{k1.4s}, [x6], #4
+	ld1r		{k2.4s}, [x6], #4
+	ld1r		{k3.4s}, [x6]
+
+	/* load state */
+	ldr		dga, [x0]
+	ldr		dgb, [x0, #16]
+
+	/* load sha1_ce_state::finalize */
+	ldr		w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
+
+	/* load input */
+0:	ld1		{v8.4s-v11.4s}, [x1], #64
+	sub		w2, w2, #1
+
+CPU_LE(	rev32		v8.16b, v8.16b		)
+CPU_LE(	rev32		v9.16b, v9.16b		)
+CPU_LE(	rev32		v10.16b, v10.16b	)
+CPU_LE(	rev32		v11.16b, v11.16b	)
+
+1:	add		t0.4s, v8.4s, k0.4s
+	mov		dg0v.16b, dgav.16b
+
+	add_update	c, ev, k0,  8,  9, 10, 11, dgb
+	add_update	c, od, k0,  9, 10, 11,  8
+	add_update	c, ev, k0, 10, 11,  8,  9
+	add_update	c, od, k0, 11,  8,  9, 10
+	add_update	c, ev, k1,  8,  9, 10, 11
+
+	add_update	p, od, k1,  9, 10, 11,  8
+	add_update	p, ev, k1, 10, 11,  8,  9
+	add_update	p, od, k1, 11,  8,  9, 10
+	add_update	p, ev, k1,  8,  9, 10, 11
+	add_update	p, od, k2,  9, 10, 11,  8
+
+	add_update	m, ev, k2, 10, 11,  8,  9
+	add_update	m, od, k2, 11,  8,  9, 10
+	add_update	m, ev, k2,  8,  9, 10, 11
+	add_update	m, od, k2,  9, 10, 11,  8
+	add_update	m, ev, k3, 10, 11,  8,  9
+
+	add_update	p, od, k3, 11,  8,  9, 10
+	add_only	p, ev, k3,  9
+	add_only	p, od, k3, 10
+	add_only	p, ev, k3, 11
+	add_only	p, od
+
+	/* update state */
+	add		dgbv.2s, dgbv.2s, dg1v.2s
+	add		dgav.4s, dgav.4s, dg0v.4s
+
+	cbnz		w2, 0b
+
+	/*
+	 * Final block: add padding and total bit count.
+	 * Skip if the input size was not a round multiple of the block size,
+	 * the padding is handled by the C code in that case.
+	 */
+	cbz		x4, 3f
+	ldr		x4, [x0, #:lo12:sha1_ce_offsetof_count]
+	movi		v9.2d, #0
+	mov		x8, #0x80000000
+	movi		v10.2d, #0
+	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
+	fmov		d8, x8
+	mov		x4, #0
+	mov		v11.d[0], xzr
+	mov		v11.d[1], x7
+	b		1b
+
+	/* store new state */
+3:	str		dga, [x0]
+	str		dgb, [x0, #16]
+	ret
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000..aefda9868
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -0,0 +1,114 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha1_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#define ASM_EXPORT(sym, val) \
+	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct sha1_ce_state {
+	struct sha1_state	sst;
+	u32			finalize;
+};
+
+asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+				  int blocks);
+
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+			  unsigned int len)
+{
+	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
+	kernel_neon_begin_partial(16);
+	sha1_base_do_update(desc, data, len,
+			    (sha1_block_fn *)sha1_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+			 unsigned int len, u8 *out)
+{
+	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
+
+	ASM_EXPORT(sha1_ce_offsetof_count,
+		   offsetof(struct sha1_ce_state, sst.count));
+	ASM_EXPORT(sha1_ce_offsetof_finalize,
+		   offsetof(struct sha1_ce_state, finalize));
+
+	/*
+	 * Allow the asm code to perform the finalization if there is no
+	 * partial data and the input is a round multiple of the block size.
+	 */
+	sctx->finalize = finalize;
+
+	kernel_neon_begin_partial(16);
+	sha1_base_do_update(desc, data, len,
+			    (sha1_block_fn *)sha1_ce_transform);
+	if (!finalize)
+		sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+	kernel_neon_end();
+	return sha1_base_finish(desc, out);
+}
+
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
+	kernel_neon_begin_partial(16);
+	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+	kernel_neon_end();
+	return sha1_base_finish(desc, out);
+}
+
+static struct shash_alg alg = {
+	.init			= sha1_base_init,
+	.update			= sha1_ce_update,
+	.final			= sha1_ce_final,
+	.finup			= sha1_ce_finup,
+	.descsize		= sizeof(struct sha1_ce_state),
+	.digestsize		= SHA1_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha1",
+		.cra_driver_name	= "sha1-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
new file mode 100644
index 000000000..5df9d9d47
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -0,0 +1,153 @@
+/*
+ * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.arch		armv8-a+crypto
+
+	dga		.req	q20
+	dgav		.req	v20
+	dgb		.req	q21
+	dgbv		.req	v21
+
+	t0		.req	v22
+	t1		.req	v23
+
+	dg0q		.req	q24
+	dg0v		.req	v24
+	dg1q		.req	q25
+	dg1v		.req	v25
+	dg2q		.req	q26
+	dg2v		.req	v26
+
+	.macro		add_only, ev, rc, s0
+	mov		dg2v.16b, dg0v.16b
+	.ifeq		\ev
+	add		t1.4s, v\s0\().4s, \rc\().4s
+	sha256h		dg0q, dg1q, t0.4s
+	sha256h2	dg1q, dg2q, t0.4s
+	.else
+	.ifnb		\s0
+	add		t0.4s, v\s0\().4s, \rc\().4s
+	.endif
+	sha256h		dg0q, dg1q, t1.4s
+	sha256h2	dg1q, dg2q, t1.4s
+	.endif
+	.endm
+
+	.macro		add_update, ev, rc, s0, s1, s2, s3
+	sha256su0	v\s0\().4s, v\s1\().4s
+	add_only	\ev, \rc, \s1
+	sha256su1	v\s0\().4s, v\s2\().4s, v\s3\().4s
+	.endm
+
+	/*
+	 * The SHA-256 round constants
+	 */
+	.align		4
+.Lsha2_rcon:
+	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+	/*
+	 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+	 *			  int blocks)
+	 */
+ENTRY(sha2_ce_transform)
+	/* load round constants */
+	adr		x8, .Lsha2_rcon
+	ld1		{ v0.4s- v3.4s}, [x8], #64
+	ld1		{ v4.4s- v7.4s}, [x8], #64
+	ld1		{ v8.4s-v11.4s}, [x8], #64
+	ld1		{v12.4s-v15.4s}, [x8]
+
+	/* load state */
+	ldp		dga, dgb, [x0]
+
+	/* load sha256_ce_state::finalize */
+	ldr		w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
+
+	/* load input */
+0:	ld1		{v16.4s-v19.4s}, [x1], #64
+	sub		w2, w2, #1
+
+CPU_LE(	rev32		v16.16b, v16.16b	)
+CPU_LE(	rev32		v17.16b, v17.16b	)
+CPU_LE(	rev32		v18.16b, v18.16b	)
+CPU_LE(	rev32		v19.16b, v19.16b	)
+
+1:	add		t0.4s, v16.4s, v0.4s
+	mov		dg0v.16b, dgav.16b
+	mov		dg1v.16b, dgbv.16b
+
+	add_update	0,  v1, 16, 17, 18, 19
+	add_update	1,  v2, 17, 18, 19, 16
+	add_update	0,  v3, 18, 19, 16, 17
+	add_update	1,  v4, 19, 16, 17, 18
+
+	add_update	0,  v5, 16, 17, 18, 19
+	add_update	1,  v6, 17, 18, 19, 16
+	add_update	0,  v7, 18, 19, 16, 17
+	add_update	1,  v8, 19, 16, 17, 18
+
+	add_update	0,  v9, 16, 17, 18, 19
+	add_update	1, v10, 17, 18, 19, 16
+	add_update	0, v11, 18, 19, 16, 17
+	add_update	1, v12, 19, 16, 17, 18
+
+	add_only	0, v13, 17
+	add_only	1, v14, 18
+	add_only	0, v15, 19
+	add_only	1
+
+	/* update state */
+	add		dgav.4s, dgav.4s, dg0v.4s
+	add		dgbv.4s, dgbv.4s, dg1v.4s
+
+	/* handled all input blocks? */
+	cbnz		w2, 0b
+
+	/*
+	 * Final block: add padding and total bit count.
+	 * Skip if the input size was not a round multiple of the block size,
+	 * the padding is handled by the C code in that case.
+	 */
+	cbz		x4, 3f
+	ldr		x4, [x0, #:lo12:sha256_ce_offsetof_count]
+	movi		v17.2d, #0
+	mov		x8, #0x80000000
+	movi		v18.2d, #0
+	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
+	fmov		d16, x8
+	mov		x4, #0
+	mov		v19.d[0], xzr
+	mov		v19.d[1], x7
+	b		1b
+
+	/* store new state */
+3:	stp		dga, dgb, [x0]
+	ret
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000..7cd587564
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -0,0 +1,130 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#define ASM_EXPORT(sym, val) \
+	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct sha256_ce_state {
+	struct sha256_state	sst;
+	u32			finalize;
+};
+
+asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+				  int blocks);
+
+static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int len)
+{
+	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
+	kernel_neon_begin_partial(28);
+	sha256_base_do_update(desc, data, len,
+			      (sha256_block_fn *)sha2_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
+			   unsigned int len, u8 *out)
+{
+	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
+
+	ASM_EXPORT(sha256_ce_offsetof_count,
+		   offsetof(struct sha256_ce_state, sst.count));
+	ASM_EXPORT(sha256_ce_offsetof_finalize,
+		   offsetof(struct sha256_ce_state, finalize));
+
+	/*
+	 * Allow the asm code to perform the finalization if there is no
+	 * partial data and the input is a round multiple of the block size.
+	 */
+	sctx->finalize = finalize;
+
+	kernel_neon_begin_partial(28);
+	sha256_base_do_update(desc, data, len,
+			      (sha256_block_fn *)sha2_ce_transform);
+	if (!finalize)
+		sha256_base_do_finalize(desc,
+					(sha256_block_fn *)sha2_ce_transform);
+	kernel_neon_end();
+	return sha256_base_finish(desc, out);
+}
+
+static int sha256_ce_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
+	kernel_neon_begin_partial(28);
+	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+	kernel_neon_end();
+	return sha256_base_finish(desc, out);
+}
+
+static struct shash_alg algs[] = { {
+	.init			= sha224_base_init,
+	.update			= sha256_ce_update,
+	.final			= sha256_ce_final,
+	.finup			= sha256_ce_finup,
+	.descsize		= sizeof(struct sha256_ce_state),
+	.digestsize		= SHA224_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha224",
+		.cra_driver_name	= "sha224-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+}, {
+	.init			= sha256_base_init,
+	.update			= sha256_ce_update,
+	.final			= sha256_ce_final,
+	.finup			= sha256_ce_finup,
+	.descsize		= sizeof(struct sha256_ce_state),
+	.digestsize		= SHA256_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha256",
+		.cra_driver_name	= "sha256-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
new file mode 100644
index 000000000..55103e50c
--- /dev/null
+++ b/arch/arm64/include/asm/Kbuild
@@ -0,0 +1,58 @@
+
+
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += checksum.h
+generic-y += clkdev.h
+generic-y += cputime.h
+generic-y += current.h
+generic-y += delay.h
+generic-y += div64.h
+generic-y += dma.h
+generic-y += dma-contiguous.h
+generic-y += early_ioremap.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += ftrace.h
+generic-y += hw_irq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += kvm_para.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += mcs_spinlock.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += msi.h
+generic-y += mutex.h
+generic-y += pci.h
+generic-y += pci-bridge.h
+generic-y += poll.h
+generic-y += preempt.h
+generic-y += resource.h
+generic-y += rwsem.h
+generic-y += scatterlist.h
+generic-y += sections.h
+generic-y += segment.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += simd.h
+generic-y += sizes.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += swab.h
+generic-y += switch_to.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += topology.h
+generic-y += trace_clock.h
+generic-y += types.h
+generic-y += unaligned.h
+generic-y += user.h
+generic-y += vga.h
+generic-y += xor.h
diff --git a/arch/arm64/include/asm/acenv.h b/arch/arm64/include/asm/acenv.h
new file mode 100644
index 000000000..b49166fde
--- /dev/null
+++ b/arch/arm64/include/asm/acenv.h
@@ -0,0 +1,18 @@
+/*
+ * ARM64 specific ACPICA environments and implementation
+ *
+ * Copyright (C) 2014, Linaro Ltd.
+ *   Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *   Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ACENV_H
+#define _ASM_ACENV_H
+
+/* It is required unconditionally by ACPI core, update it when needed. */
+
+#endif /* _ASM_ACENV_H */
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
new file mode 100644
index 000000000..59c05d8ea
--- /dev/null
+++ b/arch/arm64/include/asm/acpi.h
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *	Author: Al Stone <al.stone@linaro.org>
+ *	Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *	Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation;
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#include <linux/mm.h>
+#include <linux/irqchip/arm-gic-acpi.h>
+
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+
+/* Basic configuration for ACPI */
+#ifdef	CONFIG_ACPI
+/* ACPI table mapping after acpi_gbl_permanent_mmap is set */
+static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
+					    acpi_size size)
+{
+	if (!page_is_ram(phys >> PAGE_SHIFT))
+		return ioremap(phys, size);
+
+	return ioremap_cache(phys, size);
+}
+#define acpi_os_ioremap acpi_os_ioremap
+
+typedef u64 phys_cpuid_t;
+#define PHYS_CPUID_INVALID INVALID_HWID
+
+#define acpi_strict 1	/* No out-of-spec workarounds on ARM64 */
+extern int acpi_disabled;
+extern int acpi_noirq;
+extern int acpi_pci_disabled;
+
+/* 1 to indicate PSCI 0.2+ is implemented */
+static inline bool acpi_psci_present(void)
+{
+	return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_COMPLIANT;
+}
+
+/* 1 to indicate HVC must be used instead of SMC as the PSCI conduit */
+static inline bool acpi_psci_use_hvc(void)
+{
+	return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC;
+}
+
+static inline void disable_acpi(void)
+{
+	acpi_disabled = 1;
+	acpi_pci_disabled = 1;
+	acpi_noirq = 1;
+}
+
+static inline void enable_acpi(void)
+{
+	acpi_disabled = 0;
+	acpi_pci_disabled = 0;
+	acpi_noirq = 0;
+}
+
+/*
+ * The ACPI processor driver for ACPI core code needs this macro
+ * to find out this cpu was already mapped (mapping from CPU hardware
+ * ID to CPU logical ID) or not.
+ */
+#define cpu_physical_id(cpu) cpu_logical_map(cpu)
+
+/*
+ * It's used from ACPI core in kdump to boot UP system with SMP kernel,
+ * with this check the ACPI core will not override the CPU index
+ * obtained from GICC with 0 and not print some error message as well.
+ * Since MADT must provide at least one GICC structure for GIC
+ * initialization, CPU will be always available in MADT on ARM64.
+ */
+static inline bool acpi_has_cpu_in_madt(void)
+{
+	return true;
+}
+
+static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+void __init acpi_init_cpus(void);
+
+#else
+static inline bool acpi_psci_present(void) { return false; }
+static inline bool acpi_psci_use_hvc(void) { return false; }
+static inline void acpi_init_cpus(void) { }
+#endif /* CONFIG_ACPI */
+
+#endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/alternative-asm.h b/arch/arm64/include/asm/alternative-asm.h
new file mode 100644
index 000000000..919a67855
--- /dev/null
+++ b/arch/arm64/include/asm/alternative-asm.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_ALTERNATIVE_ASM_H
+#define __ASM_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+	.word \orig_offset - .
+	.word \alt_offset - .
+	.hword \feature
+	.byte \orig_len
+	.byte \alt_len
+.endm
+
+.macro alternative_insn insn1 insn2 cap
+661:	\insn1
+662:	.pushsection .altinstructions, "a"
+	altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+	.popsection
+	.pushsection .altinstr_replacement, "ax"
+663:	\insn2
+664:	.popsection
+	.if ((664b-663b) != (662b-661b))
+		.error "Alternatives instruction length mismatch"
+	.endif
+.endm
+
+#endif  /*  __ASSEMBLY__  */
+
+#endif /* __ASM_ALTERNATIVE_ASM_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
new file mode 100644
index 000000000..d261f01e2
--- /dev/null
+++ b/arch/arm64/include/asm/alternative.h
@@ -0,0 +1,44 @@
+#ifndef __ASM_ALTERNATIVE_H
+#define __ASM_ALTERNATIVE_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+
+struct alt_instr {
+	s32 orig_offset;	/* offset to original instruction */
+	s32 alt_offset;		/* offset to replacement instruction */
+	u16 cpufeature;		/* cpufeature bit set for replacement */
+	u8  orig_len;		/* size of original instruction(s) */
+	u8  alt_len;		/* size of new instruction(s), <= orig_len */
+};
+
+void apply_alternatives_all(void);
+void apply_alternatives(void *start, size_t length);
+void free_alternatives_memory(void);
+
+#define ALTINSTR_ENTRY(feature)						      \
+	" .word 661b - .\n"				/* label           */ \
+	" .word 663f - .\n"				/* new instruction */ \
+	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
+	" .byte 662b-661b\n"				/* source len      */ \
+	" .byte 664f-663f\n"				/* replacement len */
+
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+	"661:\n\t"							\
+	oldinstr "\n"							\
+	"662:\n"							\
+	".pushsection .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature)						\
+	".popsection\n"							\
+	".pushsection .altinstr_replacement, \"a\"\n"			\
+	"663:\n\t"							\
+	newinstr "\n"							\
+	"664:\n\t"							\
+	".popsection\n\t"						\
+	".if ((664b-663b) != (662b-661b))\n\t"				\
+	"	.error \"Alternatives instruction length mismatch\"\n\t"\
+	".endif\n"
+
+#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
new file mode 100644
index 000000000..fbe0ca31a
--- /dev/null
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -0,0 +1,132 @@
+/*
+ * arch/arm64/include/asm/arch_timer.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_ARCH_TIMER_H
+#define __ASM_ARCH_TIMER_H
+
+#include <asm/barrier.h>
+
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include <clocksource/arm_arch_timer.h>
+
+/*
+ * These register accessors are marked inline so the compiler can
+ * nicely work out which register we want, and chuck away the rest of
+ * the code.
+ */
+static __always_inline
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
+{
+	if (access == ARCH_TIMER_PHYS_ACCESS) {
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			asm volatile("msr cntp_ctl_el0,  %0" : : "r" (val));
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
+			break;
+		}
+	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			asm volatile("msr cntv_ctl_el0,  %0" : : "r" (val));
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
+			break;
+		}
+	}
+
+	isb();
+}
+
+static __always_inline
+u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
+{
+	u32 val;
+
+	if (access == ARCH_TIMER_PHYS_ACCESS) {
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
+			break;
+		}
+	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
+			break;
+		}
+	}
+
+	return val;
+}
+
+static inline u32 arch_timer_get_cntfrq(void)
+{
+	u32 val;
+	asm volatile("mrs %0,   cntfrq_el0" : "=r" (val));
+	return val;
+}
+
+static inline u32 arch_timer_get_cntkctl(void)
+{
+	u32 cntkctl;
+	asm volatile("mrs	%0, cntkctl_el1" : "=r" (cntkctl));
+	return cntkctl;
+}
+
+static inline void arch_timer_set_cntkctl(u32 cntkctl)
+{
+	asm volatile("msr	cntkctl_el1, %0" : : "r" (cntkctl));
+}
+
+static inline u64 arch_counter_get_cntpct(void)
+{
+	/*
+	 * AArch64 kernel and user space mandate the use of CNTVCT.
+	 */
+	BUG();
+	return 0;
+}
+
+static inline u64 arch_counter_get_cntvct(void)
+{
+	u64 cval;
+
+	isb();
+	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
+
+	return cval;
+}
+
+static inline int arch_timer_arch_init(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/arm-cci.h b/arch/arm64/include/asm/arm-cci.h
new file mode 100644
index 000000000..f0b63712e
--- /dev/null
+++ b/arch/arm64/include/asm/arm-cci.h
@@ -0,0 +1,27 @@
+/*
+ * arch/arm64/include/asm/arm-cci.h
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_ARM_CCI_H
+#define __ASM_ARM_CCI_H
+
+static inline bool platform_has_secure_cci_access(void)
+{
+	return false;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/asm-offsets.h b/arch/arm64/include/asm/asm-offsets.h
new file mode 100644
index 000000000..d370ee36a
--- /dev/null
+++ b/arch/arm64/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
new file mode 100644
index 000000000..144b64ad9
--- /dev/null
+++ b/arch/arm64/include/asm/assembler.h
@@ -0,0 +1,210 @@
+/*
+ * Based on arch/arm/include/asm/assembler.h
+ *
+ * Copyright (C) 1996-2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASSEMBLY__
+#error "Only include this from assembly code"
+#endif
+
+#ifndef __ASM_ASSEMBLER_H
+#define __ASM_ASSEMBLER_H
+
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+
+/*
+ * Stack pushing/popping (register pairs only). Equivalent to store decrement
+ * before, load increment after.
+ */
+	.macro	push, xreg1, xreg2
+	stp	\xreg1, \xreg2, [sp, #-16]!
+	.endm
+
+	.macro	pop, xreg1, xreg2
+	ldp	\xreg1, \xreg2, [sp], #16
+	.endm
+
+/*
+ * Enable and disable interrupts.
+ */
+	.macro	disable_irq
+	msr	daifset, #2
+	.endm
+
+	.macro	enable_irq
+	msr	daifclr, #2
+	.endm
+
+/*
+ * Save/disable and restore interrupts.
+ */
+	.macro	save_and_disable_irqs, olddaif
+	mrs	\olddaif, daif
+	disable_irq
+	.endm
+
+	.macro	restore_irqs, olddaif
+	msr	daif, \olddaif
+	.endm
+
+/*
+ * Enable and disable debug exceptions.
+ */
+	.macro	disable_dbg
+	msr	daifset, #8
+	.endm
+
+	.macro	enable_dbg
+	msr	daifclr, #8
+	.endm
+
+	.macro	disable_step_tsk, flgs, tmp
+	tbz	\flgs, #TIF_SINGLESTEP, 9990f
+	mrs	\tmp, mdscr_el1
+	bic	\tmp, \tmp, #1
+	msr	mdscr_el1, \tmp
+	isb	// Synchronise with enable_dbg
+9990:
+	.endm
+
+	.macro	enable_step_tsk, flgs, tmp
+	tbz	\flgs, #TIF_SINGLESTEP, 9990f
+	disable_dbg
+	mrs	\tmp, mdscr_el1
+	orr	\tmp, \tmp, #1
+	msr	mdscr_el1, \tmp
+9990:
+	.endm
+
+/*
+ * Enable both debug exceptions and interrupts. This is likely to be
+ * faster than two daifclr operations, since writes to this register
+ * are self-synchronising.
+ */
+	.macro	enable_dbg_and_irq
+	msr	daifclr, #(8 | 2)
+	.endm
+
+/*
+ * SMP data memory barrier
+ */
+	.macro	smp_dmb, opt
+#ifdef CONFIG_SMP
+	dmb	\opt
+#endif
+	.endm
+
+#define USER(l, x...)				\
+9999:	x;					\
+	.section __ex_table,"a";		\
+	.align	3;				\
+	.quad	9999b,l;			\
+	.previous
+
+/*
+ * Register aliases.
+ */
+lr	.req	x30		// link register
+
+/*
+ * Vector entry
+ */
+	 .macro	ventry	label
+	.align	7
+	b	\label
+	.endm
+
+/*
+ * Select code when configured for BE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_BE(code...) code
+#else
+#define CPU_BE(code...)
+#endif
+
+/*
+ * Select code when configured for LE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_LE(code...)
+#else
+#define CPU_LE(code...) code
+#endif
+
+/*
+ * Define a macro that constructs a 64-bit value by concatenating two
+ * 32-bit registers. Note that on big endian systems the order of the
+ * registers is swapped.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	.macro	regs_to_64, rd, lbits, hbits
+#else
+	.macro	regs_to_64, rd, hbits, lbits
+#endif
+	orr	\rd, \lbits, \hbits, lsl #32
+	.endm
+
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
+ * <symbol> is within the range +/- 4 GB of the PC.
+ */
+	/*
+	 * @dst: destination register (64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional scratch register to be used if <dst> == sp, which
+	 *       is not allowed in an adrp instruction
+	 */
+	.macro	adr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	add	\dst, \dst, :lo12:\sym
+	.else
+	adrp	\tmp, \sym
+	add	\dst, \tmp, :lo12:\sym
+	.endif
+	.endm
+
+	/*
+	 * @dst: destination register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
+	 *       32-bit wide register, in which case it cannot be used to hold
+	 *       the address
+	 */
+	.macro	ldr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	ldr	\dst, [\dst, :lo12:\sym]
+	.else
+	adrp	\tmp, \sym
+	ldr	\dst, [\tmp, :lo12:\sym]
+	.endif
+	.endm
+
+	/*
+	 * @src: source register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: mandatory 64-bit scratch register to calculate the address
+	 *       while <src> needs to be preserved.
+	 */
+	.macro	str_l, src, sym, tmp
+	adrp	\tmp, \sym
+	str	\src, [\tmp, :lo12:\sym]
+	.endm
+
+#endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
new file mode 100644
index 000000000..7047051de
--- /dev/null
+++ b/arch/arm64/include/asm/atomic.h
@@ -0,0 +1,256 @@
+/*
+ * Based on arch/arm/include/asm/atomic.h
+ *
+ * Copyright (C) 1996 Russell King.
+ * Copyright (C) 2002 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_ATOMIC_H
+#define __ASM_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#ifdef __KERNEL__
+
+/*
+ * On ARM, ordinary assignment (str instruction) doesn't clear the local
+ * strex/ldrex monitor on some implementations. The reason we can use it for
+ * atomic_set() is the clrex or dummy strex done on every exception return.
+ */
+#define atomic_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_set(v,i)	(((v)->counter) = (i))
+
+/*
+ * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
+ * store exclusive to ensure that these are atomic.  We may loop
+ * to ensure that the update happens.
+ */
+
+#define ATOMIC_OP(op, asm_op)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long tmp;						\
+	int result;							\
+									\
+	asm volatile("// atomic_" #op "\n"				\
+"1:	ldxr	%w0, %2\n"						\
+"	" #asm_op "	%w0, %w0, %w3\n"				\
+"	stxr	%w1, %w0, %2\n"						\
+"	cbnz	%w1, 1b"						\
+	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
+	: "Ir" (i));							\
+}									\
+
+#define ATOMIC_OP_RETURN(op, asm_op)					\
+static inline int atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned long tmp;						\
+	int result;							\
+									\
+	asm volatile("// atomic_" #op "_return\n"			\
+"1:	ldxr	%w0, %2\n"						\
+"	" #asm_op "	%w0, %w0, %w3\n"				\
+"	stlxr	%w1, %w0, %2\n"						\
+"	cbnz	%w1, 1b"						\
+	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
+	: "Ir" (i)							\
+	: "memory");							\
+									\
+	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op, asm_op)						\
+	ATOMIC_OP(op, asm_op)						\
+	ATOMIC_OP_RETURN(op, asm_op)
+
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+{
+	unsigned long tmp;
+	int oldval;
+
+	smp_mb();
+
+	asm volatile("// atomic_cmpxchg\n"
+"1:	ldxr	%w1, %2\n"
+"	cmp	%w1, %w3\n"
+"	b.ne	2f\n"
+"	stxr	%w0, %w4, %2\n"
+"	cbnz	%w0, 1b\n"
+"2:"
+	: "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
+	: "Ir" (old), "r" (new)
+	: "cc");
+
+	smp_mb();
+	return oldval;
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+
+	c = atomic_read(v);
+	while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c)
+		c = old;
+	return c;
+}
+
+#define atomic_inc(v)		atomic_add(1, v)
+#define atomic_dec(v)		atomic_sub(1, v)
+
+#define atomic_inc_and_test(v)	(atomic_add_return(1, v) == 0)
+#define atomic_dec_and_test(v)	(atomic_sub_return(1, v) == 0)
+#define atomic_inc_return(v)    (atomic_add_return(1, v))
+#define atomic_dec_return(v)    (atomic_sub_return(1, v))
+#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+
+#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
+
+/*
+ * 64-bit atomic operations.
+ */
+#define ATOMIC64_INIT(i) { (i) }
+
+#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic64_set(v,i)	(((v)->counter) = (i))
+
+#define ATOMIC64_OP(op, asm_op)						\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	long result;							\
+	unsigned long tmp;						\
+									\
+	asm volatile("// atomic64_" #op "\n"				\
+"1:	ldxr	%0, %2\n"						\
+"	" #asm_op "	%0, %0, %3\n"					\
+"	stxr	%w1, %0, %2\n"						\
+"	cbnz	%w1, 1b"						\
+	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
+	: "Ir" (i));							\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, asm_op)					\
+static inline long atomic64_##op##_return(long i, atomic64_t *v)	\
+{									\
+	long result;							\
+	unsigned long tmp;						\
+									\
+	asm volatile("// atomic64_" #op "_return\n"			\
+"1:	ldxr	%0, %2\n"						\
+"	" #asm_op "	%0, %0, %3\n"					\
+"	stlxr	%w1, %0, %2\n"						\
+"	cbnz	%w1, 1b"						\
+	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
+	: "Ir" (i)							\
+	: "memory");							\
+									\
+	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC64_OPS(op, asm_op)					\
+	ATOMIC64_OP(op, asm_op)						\
+	ATOMIC64_OP_RETURN(op, asm_op)
+
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, sub)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
+{
+	long oldval;
+	unsigned long res;
+
+	smp_mb();
+
+	asm volatile("// atomic64_cmpxchg\n"
+"1:	ldxr	%1, %2\n"
+"	cmp	%1, %3\n"
+"	b.ne	2f\n"
+"	stxr	%w0, %4, %2\n"
+"	cbnz	%w0, 1b\n"
+"2:"
+	: "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
+	: "Ir" (old), "r" (new)
+	: "cc");
+
+	smp_mb();
+	return oldval;
+}
+
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long result;
+	unsigned long tmp;
+
+	asm volatile("// atomic64_dec_if_positive\n"
+"1:	ldxr	%0, %2\n"
+"	subs	%0, %0, #1\n"
+"	b.mi	2f\n"
+"	stlxr	%w1, %0, %2\n"
+"	cbnz	%w1, 1b\n"
+"	dmb	ish\n"
+"2:"
+	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
+	:
+	: "cc", "memory");
+
+	return result;
+}
+
+static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long c, old;
+
+	c = atomic64_read(v);
+	while (c != u && (old = atomic64_cmpxchg((v), c, c + a)) != c)
+		c = old;
+
+	return c != u;
+}
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)			atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v)			atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
new file mode 100644
index 000000000..71f19c4dc
--- /dev/null
+++ b/arch/arm64/include/asm/barrier.h
@@ -0,0 +1,125 @@
+/*
+ * Based on arch/arm/include/asm/barrier.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#define sev()		asm volatile("sev" : : : "memory")
+#define wfe()		asm volatile("wfe" : : : "memory")
+#define wfi()		asm volatile("wfi" : : : "memory")
+
+#define isb()		asm volatile("isb" : : : "memory")
+#define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
+#define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
+
+#define mb()		dsb(sy)
+#define rmb()		dsb(ld)
+#define wmb()		dsb(st)
+
+#define dma_rmb()	dmb(oshld)
+#define dma_wmb()	dmb(oshst)
+
+#ifndef CONFIG_SMP
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
+#else
+
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	dmb(ishld)
+#define smp_wmb()	dmb(ishst)
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("stlrb %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 2:								\
+		asm volatile ("stlrh %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	}								\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1;						\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("ldarb %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 2:								\
+		asm volatile ("ldarh %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	}								\
+	___p1;								\
+})
+
+#endif
+
+#define read_barrier_depends()		do { } while(0)
+#define smp_read_barrier_depends()	do { } while(0)
+
+#define set_mb(var, value)	do { var = value; smp_mb(); } while (0)
+#define nop()		asm volatile("nop");
+
+#define smp_mb__before_atomic()	smp_mb()
+#define smp_mb__after_atomic()	smp_mb()
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* __ASM_BARRIER_H */
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
new file mode 100644
index 000000000..9c19594ce
--- /dev/null
+++ b/arch/arm64/include/asm/bitops.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITOPS_H
+#define __ASM_BITOPS_H
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+/*
+ * Little endian assembly atomic bitops.
+ */
+extern void set_bit(int nr, volatile unsigned long *p);
+extern void clear_bit(int nr, volatile unsigned long *p);
+extern void change_bit(int nr, volatile unsigned long *p);
+extern int test_and_set_bit(int nr, volatile unsigned long *p);
+extern int test_and_clear_bit(int nr, volatile unsigned long *p);
+extern int test_and_change_bit(int nr, volatile unsigned long *p);
+
+#include <asm-generic/bitops/builtin-__ffs.h>
+#include <asm-generic/bitops/builtin-ffs.h>
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/builtin-fls.h>
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/find.h>
+
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/le.h>
+
+/*
+ * Ext2 is defined to use little-endian byte ordering.
+ */
+#define ext2_set_bit_atomic(lock, nr, p)	test_and_set_bit_le(nr, p)
+#define ext2_clear_bit_atomic(lock, nr, p)	test_and_clear_bit_le(nr, p)
+
+#endif /* __ASM_BITOPS_H */
diff --git a/arch/arm64/include/asm/bitrev.h b/arch/arm64/include/asm/bitrev.h
new file mode 100644
index 000000000..a5a0c3660
--- /dev/null
+++ b/arch/arm64/include/asm/bitrev.h
@@ -0,0 +1,19 @@
+#ifndef __ASM_BITREV_H
+#define __ASM_BITREV_H
+static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x)
+{
+	__asm__ ("rbit %w0, %w1" : "=r" (x) : "r" (x));
+	return x;
+}
+
+static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x)
+{
+	return __arch_bitrev32((u32)x) >> 16;
+}
+
+static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x)
+{
+	return __arch_bitrev32((u32)x) >> 24;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
new file mode 100644
index 000000000..bde449936
--- /dev/null
+++ b/arch/arm64/include/asm/cache.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CACHE_H
+#define __ASM_CACHE_H
+
+#include <asm/cachetype.h>
+
+#define L1_CACHE_SHIFT		6
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+/*
+ * Memory returned by kmalloc() may be used for DMA, so we must make
+ * sure that all such allocations are cache aligned. Otherwise,
+ * unrelated code may cause parts of the buffer to be read into the
+ * cache before the transfer is done, causing old data to be seen by
+ * the CPU.
+ */
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+
+#ifndef __ASSEMBLY__
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+static inline int cache_line_size(void)
+{
+	u32 cwg = cache_type_cwg();
+	return cwg ? 4 << cwg : L1_CACHE_BYTES;
+}
+
+#endif	/* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
new file mode 100644
index 000000000..67d309cc3
--- /dev/null
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -0,0 +1,160 @@
+/*
+ * Based on arch/arm/include/asm/cacheflush.h
+ *
+ * Copyright (C) 1999-2002 Russell King.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CACHEFLUSH_H
+#define __ASM_CACHEFLUSH_H
+
+#include <linux/mm.h>
+
+/*
+ * This flag is used to indicate that the page pointed to by a pte is clean
+ * and does not require cleaning before returning it to the user.
+ */
+#define PG_dcache_clean PG_arch_1
+
+/*
+ *	MM Cache Management
+ *	===================
+ *
+ *	The arch/arm64/mm/cache.S implements these methods.
+ *
+ *	Start addresses are inclusive and end addresses are exclusive; start
+ *	addresses should be rounded down, end addresses up.
+ *
+ *	See Documentation/cachetlb.txt for more information. Please note that
+ *	the implementation assumes non-aliasing VIPT D-cache and (aliasing)
+ *	VIPT or ASID-tagged VIVT I-cache.
+ *
+ *	flush_cache_all()
+ *
+ *		Unconditionally clean and invalidate the entire cache.
+ *
+ *	flush_cache_mm(mm)
+ *
+ *		Clean and invalidate all user space cache entries
+ *		before a change of page tables.
+ *
+ *	flush_icache_range(start, end)
+ *
+ *		Ensure coherency between the I-cache and the D-cache in the
+ *		region described by start, end.
+ *		- start  - virtual start address
+ *		- end    - virtual end address
+ *
+ *	__flush_cache_user_range(start, end)
+ *
+ *		Ensure coherency between the I-cache and the D-cache in the
+ *		region described by start, end.
+ *		- start  - virtual start address
+ *		- end    - virtual end address
+ *
+ *	__flush_dcache_area(kaddr, size)
+ *
+ *		Ensure that the data held in page is written back.
+ *		- kaddr  - page address
+ *		- size   - region size
+ */
+extern void flush_cache_all(void);
+extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void flush_icache_range(unsigned long start, unsigned long end);
+extern void __flush_dcache_area(void *addr, size_t len);
+extern long __flush_cache_user_range(unsigned long start, unsigned long end);
+
+static inline void flush_cache_mm(struct mm_struct *mm)
+{
+}
+
+static inline void flush_cache_page(struct vm_area_struct *vma,
+				    unsigned long user_addr, unsigned long pfn)
+{
+}
+
+/*
+ * Cache maintenance functions used by the DMA API. No to be used directly.
+ */
+extern void __dma_map_area(const void *, size_t, int);
+extern void __dma_unmap_area(const void *, size_t, int);
+extern void __dma_flush_range(const void *, const void *);
+
+/*
+ * Copy user data from/to a page which is mapped into a different
+ * processes address space.  Really, we want to allow our "user
+ * space" model to handle this.
+ */
+extern void copy_to_user_page(struct vm_area_struct *, struct page *,
+	unsigned long, void *, const void *, unsigned long);
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	do {							\
+		memcpy(dst, src, len);				\
+	} while (0)
+
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
+/*
+ * flush_dcache_page is used when the kernel has written to the page
+ * cache page at virtual address page->virtual.
+ *
+ * If this page isn't mapped (ie, page_mapping == NULL), or it might
+ * have userspace mappings, then we _must_ always clean + invalidate
+ * the dcache entries associated with the kernel mapping.
+ *
+ * Otherwise we can defer the operation, and clean the cache when we are
+ * about to change to user space.  This is the same method as used on SPARC64.
+ * See update_mmu_cache for the user space part.
+ */
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+extern void flush_dcache_page(struct page *);
+
+static inline void __flush_icache_all(void)
+{
+	asm("ic	ialluis");
+	dsb(ish);
+}
+
+#define flush_dcache_mmap_lock(mapping) \
+	spin_lock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_unlock(mapping) \
+	spin_unlock_irq(&(mapping)->tree_lock)
+
+/*
+ * We don't appear to need to do anything here.  In fact, if we did, we'd
+ * duplicate cache flushing elsewhere performed by flush_dcache_page().
+ */
+#define flush_icache_page(vma,page)	do { } while (0)
+
+/*
+ * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache).
+ */
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+}
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+}
+
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
new file mode 100644
index 000000000..da2fc9e3c
--- /dev/null
+++ b/arch/arm64/include/asm/cachetype.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CACHETYPE_H
+#define __ASM_CACHETYPE_H
+
+#include <asm/cputype.h>
+
+#define CTR_L1IP_SHIFT		14
+#define CTR_L1IP_MASK		3
+#define CTR_CWG_SHIFT		24
+#define CTR_CWG_MASK		15
+
+#define ICACHE_POLICY_RESERVED	0
+#define ICACHE_POLICY_AIVIVT	1
+#define ICACHE_POLICY_VIPT	2
+#define ICACHE_POLICY_PIPT	3
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitops.h>
+
+#define CTR_L1IP(ctr)	(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
+
+#define ICACHEF_ALIASING	BIT(0)
+#define ICACHEF_AIVIVT		BIT(1)
+
+extern unsigned long __icache_flags;
+
+/*
+ * NumSets, bits[27:13] - (Number of sets in cache) - 1
+ * Associativity, bits[12:3] - (Associativity of cache) - 1
+ * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2
+ */
+#define CCSIDR_EL1_WRITE_THROUGH	BIT(31)
+#define CCSIDR_EL1_WRITE_BACK		BIT(30)
+#define CCSIDR_EL1_READ_ALLOCATE	BIT(29)
+#define CCSIDR_EL1_WRITE_ALLOCATE	BIT(28)
+#define CCSIDR_EL1_LINESIZE_MASK	0x7
+#define CCSIDR_EL1_LINESIZE(x)		((x) & CCSIDR_EL1_LINESIZE_MASK)
+#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT	3
+#define CCSIDR_EL1_ASSOCIATIVITY_MASK	0x3ff
+#define CCSIDR_EL1_ASSOCIATIVITY(x)	\
+	(((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK)
+#define CCSIDR_EL1_NUMSETS_SHIFT	13
+#define CCSIDR_EL1_NUMSETS_MASK		0x7fff
+#define CCSIDR_EL1_NUMSETS(x) \
+	(((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK)
+
+#define CACHE_LINESIZE(x)	(16 << CCSIDR_EL1_LINESIZE(x))
+#define CACHE_NUMSETS(x)	(CCSIDR_EL1_NUMSETS(x) + 1)
+#define CACHE_ASSOCIATIVITY(x)	(CCSIDR_EL1_ASSOCIATIVITY(x) + 1)
+
+extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr);
+
+/* Helpers for Level 1 Instruction cache csselr = 1L */
+static inline int icache_get_linesize(void)
+{
+	return CACHE_LINESIZE(cache_get_ccsidr(1L));
+}
+
+static inline int icache_get_numsets(void)
+{
+	return CACHE_NUMSETS(cache_get_ccsidr(1L));
+}
+
+/*
+ * Whilst the D-side always behaves as PIPT on AArch64, aliasing is
+ * permitted in the I-cache.
+ */
+static inline int icache_is_aliasing(void)
+{
+	return test_bit(ICACHEF_ALIASING, &__icache_flags);
+}
+
+static inline int icache_is_aivivt(void)
+{
+	return test_bit(ICACHEF_AIVIVT, &__icache_flags);
+}
+
+static inline u32 cache_type_cwg(void)
+{
+	return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
+}
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* __ASM_CACHETYPE_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
new file mode 100644
index 000000000..d8c25b7b1
--- /dev/null
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -0,0 +1,279 @@
+/*
+ * Based on arch/arm/include/asm/cmpxchg.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CMPXCHG_H
+#define __ASM_CMPXCHG_H
+
+#include <linux/bug.h>
+#include <linux/mmdebug.h>
+
+#include <asm/barrier.h>
+
+static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
+{
+	unsigned long ret, tmp;
+
+	switch (size) {
+	case 1:
+		asm volatile("//	__xchg1\n"
+		"1:	ldxrb	%w0, %2\n"
+		"	stlxrb	%w1, %w3, %2\n"
+		"	cbnz	%w1, 1b\n"
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
+			: "r" (x)
+			: "memory");
+		break;
+	case 2:
+		asm volatile("//	__xchg2\n"
+		"1:	ldxrh	%w0, %2\n"
+		"	stlxrh	%w1, %w3, %2\n"
+		"	cbnz	%w1, 1b\n"
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
+			: "r" (x)
+			: "memory");
+		break;
+	case 4:
+		asm volatile("//	__xchg4\n"
+		"1:	ldxr	%w0, %2\n"
+		"	stlxr	%w1, %w3, %2\n"
+		"	cbnz	%w1, 1b\n"
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
+			: "r" (x)
+			: "memory");
+		break;
+	case 8:
+		asm volatile("//	__xchg8\n"
+		"1:	ldxr	%0, %2\n"
+		"	stlxr	%w1, %3, %2\n"
+		"	cbnz	%w1, 1b\n"
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
+			: "r" (x)
+			: "memory");
+		break;
+	default:
+		BUILD_BUG();
+	}
+
+	smp_mb();
+	return ret;
+}
+
+#define xchg(ptr,x) \
+({ \
+	__typeof__(*(ptr)) __ret; \
+	__ret = (__typeof__(*(ptr))) \
+		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+	__ret; \
+})
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	unsigned long oldval = 0, res;
+
+	switch (size) {
+	case 1:
+		do {
+			asm volatile("// __cmpxchg1\n"
+			"	ldxrb	%w1, %2\n"
+			"	mov	%w0, #0\n"
+			"	cmp	%w1, %w3\n"
+			"	b.ne	1f\n"
+			"	stxrb	%w0, %w4, %2\n"
+			"1:\n"
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr)
+				: "Ir" (old), "r" (new)
+				: "cc");
+		} while (res);
+		break;
+
+	case 2:
+		do {
+			asm volatile("// __cmpxchg2\n"
+			"	ldxrh	%w1, %2\n"
+			"	mov	%w0, #0\n"
+			"	cmp	%w1, %w3\n"
+			"	b.ne	1f\n"
+			"	stxrh	%w0, %w4, %2\n"
+			"1:\n"
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr)
+				: "Ir" (old), "r" (new)
+				: "cc");
+		} while (res);
+		break;
+
+	case 4:
+		do {
+			asm volatile("// __cmpxchg4\n"
+			"	ldxr	%w1, %2\n"
+			"	mov	%w0, #0\n"
+			"	cmp	%w1, %w3\n"
+			"	b.ne	1f\n"
+			"	stxr	%w0, %w4, %2\n"
+			"1:\n"
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr)
+				: "Ir" (old), "r" (new)
+				: "cc");
+		} while (res);
+		break;
+
+	case 8:
+		do {
+			asm volatile("// __cmpxchg8\n"
+			"	ldxr	%1, %2\n"
+			"	mov	%w0, #0\n"
+			"	cmp	%1, %3\n"
+			"	b.ne	1f\n"
+			"	stxr	%w0, %4, %2\n"
+			"1:\n"
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr)
+				: "Ir" (old), "r" (new)
+				: "cc");
+		} while (res);
+		break;
+
+	default:
+		BUILD_BUG();
+	}
+
+	return oldval;
+}
+
+#define system_has_cmpxchg_double()     1
+
+static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2,
+		unsigned long old1, unsigned long old2,
+		unsigned long new1, unsigned long new2, int size)
+{
+	unsigned long loop, lost;
+
+	switch (size) {
+	case 8:
+		VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1);
+		do {
+			asm volatile("// __cmpxchg_double8\n"
+			"	ldxp	%0, %1, %2\n"
+			"	eor	%0, %0, %3\n"
+			"	eor	%1, %1, %4\n"
+			"	orr	%1, %0, %1\n"
+			"	mov	%w0, #0\n"
+			"	cbnz	%1, 1f\n"
+			"	stxp	%w0, %5, %6, %2\n"
+			"1:\n"
+				: "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1)
+				: "r" (old1), "r"(old2), "r"(new1), "r"(new2));
+		} while (loop);
+		break;
+	default:
+		BUILD_BUG();
+	}
+
+	return !lost;
+}
+
+static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2,
+			unsigned long old1, unsigned long old2,
+			unsigned long new1, unsigned long new2, int size)
+{
+	int ret;
+
+	smp_mb();
+	ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size);
+	smp_mb();
+
+	return ret;
+}
+
+static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
+					 unsigned long new, int size)
+{
+	unsigned long ret;
+
+	smp_mb();
+	ret = __cmpxchg(ptr, old, new, size);
+	smp_mb();
+
+	return ret;
+}
+
+#define cmpxchg(ptr, o, n) \
+({ \
+	__typeof__(*(ptr)) __ret; \
+	__ret = (__typeof__(*(ptr))) \
+		__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
+			     sizeof(*(ptr))); \
+	__ret; \
+})
+
+#define cmpxchg_local(ptr, o, n) \
+({ \
+	__typeof__(*(ptr)) __ret; \
+	__ret = (__typeof__(*(ptr))) \
+		__cmpxchg((ptr), (unsigned long)(o), \
+			  (unsigned long)(n), sizeof(*(ptr))); \
+	__ret; \
+})
+
+#define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \
+({\
+	int __ret;\
+	__ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \
+			(unsigned long)(o2), (unsigned long)(n1), \
+			(unsigned long)(n2), sizeof(*(ptr1)));\
+	__ret; \
+})
+
+#define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \
+({\
+	int __ret;\
+	__ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \
+			(unsigned long)(o2), (unsigned long)(n1), \
+			(unsigned long)(n2), sizeof(*(ptr1)));\
+	__ret; \
+})
+
+#define _protect_cmpxchg_local(pcp, o, n)			\
+({								\
+	typeof(*raw_cpu_ptr(&(pcp))) __ret;			\
+	preempt_disable();					\
+	__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n);	\
+	preempt_enable();					\
+	__ret;							\
+})
+
+#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+
+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)		\
+({									\
+	int __ret;							\
+	preempt_disable();						\
+	__ret = cmpxchg_double_local(	raw_cpu_ptr(&(ptr1)),		\
+					raw_cpu_ptr(&(ptr2)),		\
+					o1, o2, n1, n2);		\
+	preempt_enable();						\
+	__ret;								\
+})
+
+#define cmpxchg64(ptr,o,n)		cmpxchg((ptr),(o),(n))
+#define cmpxchg64_local(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
+
+#define cmpxchg64_relaxed(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
+
+#endif	/* __ASM_CMPXCHG_H */
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
new file mode 100644
index 000000000..7fbed6919
--- /dev/null
+++ b/arch/arm64/include/asm/compat.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_COMPAT_H
+#define __ASM_COMPAT_H
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+
+#define COMPAT_USER_HZ		100
+#ifdef __AARCH64EB__
+#define COMPAT_UTS_MACHINE	"armv8b\0\0"
+#else
+#define COMPAT_UTS_MACHINE	"armv8l\0\0"
+#endif
+
+typedef u32		compat_size_t;
+typedef s32		compat_ssize_t;
+typedef s32		compat_time_t;
+typedef s32		compat_clock_t;
+typedef s32		compat_pid_t;
+typedef u16		__compat_uid_t;
+typedef u16		__compat_gid_t;
+typedef u16		__compat_uid16_t;
+typedef u16		__compat_gid16_t;
+typedef u32		__compat_uid32_t;
+typedef u32		__compat_gid32_t;
+typedef u16		compat_mode_t;
+typedef u32		compat_ino_t;
+typedef u32		compat_dev_t;
+typedef s32		compat_off_t;
+typedef s64		compat_loff_t;
+typedef s32		compat_nlink_t;
+typedef u16		compat_ipc_pid_t;
+typedef s32		compat_daddr_t;
+typedef u32		compat_caddr_t;
+typedef __kernel_fsid_t	compat_fsid_t;
+typedef s32		compat_key_t;
+typedef s32		compat_timer_t;
+
+typedef s16		compat_short_t;
+typedef s32		compat_int_t;
+typedef s32		compat_long_t;
+typedef s64		compat_s64;
+typedef u16		compat_ushort_t;
+typedef u32		compat_uint_t;
+typedef u32		compat_ulong_t;
+typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
+
+struct compat_timespec {
+	compat_time_t	tv_sec;
+	s32		tv_nsec;
+};
+
+struct compat_timeval {
+	compat_time_t	tv_sec;
+	s32		tv_usec;
+};
+
+struct compat_stat {
+#ifdef __AARCH64EB__
+	short		st_dev;
+	short		__pad1;
+#else
+	compat_dev_t	st_dev;
+#endif
+	compat_ino_t	st_ino;
+	compat_mode_t	st_mode;
+	compat_ushort_t	st_nlink;
+	__compat_uid16_t	st_uid;
+	__compat_gid16_t	st_gid;
+#ifdef __AARCH64EB__
+	short		st_rdev;
+	short		__pad2;
+#else
+	compat_dev_t	st_rdev;
+#endif
+	compat_off_t	st_size;
+	compat_off_t	st_blksize;
+	compat_off_t	st_blocks;
+	compat_time_t	st_atime;
+	compat_ulong_t	st_atime_nsec;
+	compat_time_t	st_mtime;
+	compat_ulong_t	st_mtime_nsec;
+	compat_time_t	st_ctime;
+	compat_ulong_t	st_ctime_nsec;
+	compat_ulong_t	__unused4[2];
+};
+
+struct compat_flock {
+	short		l_type;
+	short		l_whence;
+	compat_off_t	l_start;
+	compat_off_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
+struct compat_flock64 {
+	short		l_type;
+	short		l_whence;
+	compat_loff_t	l_start;
+	compat_loff_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+struct compat_statfs {
+	int		f_type;
+	int		f_bsize;
+	int		f_blocks;
+	int		f_bfree;
+	int		f_bavail;
+	int		f_files;
+	int		f_ffree;
+	compat_fsid_t	f_fsid;
+	int		f_namelen;	/* SunOS ignores this field. */
+	int		f_frsize;
+	int		f_flags;
+	int		f_spare[4];
+};
+
+#define COMPAT_RLIM_INFINITY		0xffffffff
+
+typedef u32		compat_old_sigset_t;
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32		compat_sigset_word;
+
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid32_t _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid32_t _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;	/* which child */
+			__compat_uid32_t _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			compat_uptr_t _addr; /* faulting insn/memory ref. */
+			short _addr_lsb; /* LSB of the reported address */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+
+		/* SIGSYS */
+		struct {
+			compat_uptr_t _call_addr; /* calling user insn */
+			int _syscall;	/* triggering system call number */
+			compat_uint_t _arch;	/* AUDIT_ARCH_* of syscall */
+		} _sigsys;
+	} _sifields;
+} compat_siginfo_t;
+
+#define COMPAT_OFF_T_MAX	0x7fffffff
+#define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+	return (void __user *)(unsigned long)uptr;
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
+#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
+
+static inline void __user *arch_compat_alloc_user_space(long len)
+{
+	return (void __user *)compat_user_stack_pointer() - len;
+}
+
+struct compat_ipc64_perm {
+	compat_key_t key;
+	__compat_uid32_t uid;
+	__compat_gid32_t gid;
+	__compat_uid32_t cuid;
+	__compat_gid32_t cgid;
+	unsigned short mode;
+	unsigned short __pad1;
+	unsigned short seq;
+	unsigned short __pad2;
+	compat_ulong_t unused1;
+	compat_ulong_t unused2;
+};
+
+struct compat_semid64_ds {
+	struct compat_ipc64_perm sem_perm;
+	compat_time_t  sem_otime;
+	compat_ulong_t __unused1;
+	compat_time_t  sem_ctime;
+	compat_ulong_t __unused2;
+	compat_ulong_t sem_nsems;
+	compat_ulong_t __unused3;
+	compat_ulong_t __unused4;
+};
+
+struct compat_msqid64_ds {
+	struct compat_ipc64_perm msg_perm;
+	compat_time_t  msg_stime;
+	compat_ulong_t __unused1;
+	compat_time_t  msg_rtime;
+	compat_ulong_t __unused2;
+	compat_time_t  msg_ctime;
+	compat_ulong_t __unused3;
+	compat_ulong_t msg_cbytes;
+	compat_ulong_t msg_qnum;
+	compat_ulong_t msg_qbytes;
+	compat_pid_t   msg_lspid;
+	compat_pid_t   msg_lrpid;
+	compat_ulong_t __unused4;
+	compat_ulong_t __unused5;
+};
+
+struct compat_shmid64_ds {
+	struct compat_ipc64_perm shm_perm;
+	compat_size_t  shm_segsz;
+	compat_time_t  shm_atime;
+	compat_ulong_t __unused1;
+	compat_time_t  shm_dtime;
+	compat_ulong_t __unused2;
+	compat_time_t  shm_ctime;
+	compat_ulong_t __unused3;
+	compat_pid_t   shm_cpid;
+	compat_pid_t   shm_lpid;
+	compat_ulong_t shm_nattch;
+	compat_ulong_t __unused4;
+	compat_ulong_t __unused5;
+};
+
+static inline int is_compat_task(void)
+{
+	return test_thread_flag(TIF_32BIT);
+}
+
+static inline int is_compat_thread(struct thread_info *thread)
+{
+	return test_ti_thread_flag(thread, TIF_32BIT);
+}
+
+#else /* !CONFIG_COMPAT */
+
+static inline int is_compat_thread(struct thread_info *thread)
+{
+	return 0;
+}
+
+#endif /* CONFIG_COMPAT */
+#endif /* __KERNEL__ */
+#endif /* __ASM_COMPAT_H */
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
new file mode 100644
index 000000000..ee35fd0f2
--- /dev/null
+++ b/arch/arm64/include/asm/compiler.h
@@ -0,0 +1,30 @@
+/*
+ * Based on arch/arm/include/asm/compiler.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_COMPILER_H
+#define __ASM_COMPILER_H
+
+/*
+ * This is used to ensure the compiler did actually allocate the register we
+ * asked it for some inline assembly sequences.  Apparently we can't trust the
+ * compiler from one version to another so a bit of paranoia won't hurt.  This
+ * string is meant to be concatenated with the inline asm string and will
+ * cause compilation to stop on mismatch.  (for details, see gcc PR 15089)
+ */
+#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
+
+#endif	/* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
new file mode 100644
index 000000000..8e797b2fc
--- /dev/null
+++ b/arch/arm64/include/asm/cpu.h
@@ -0,0 +1,66 @@
+/*
+  * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPU_H
+#define __ASM_CPU_H
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+
+/*
+ * Records attributes of an individual CPU.
+ */
+struct cpuinfo_arm64 {
+	struct cpu	cpu;
+	u32		reg_ctr;
+	u32		reg_cntfrq;
+	u32		reg_dczid;
+	u32		reg_midr;
+
+	u64		reg_id_aa64dfr0;
+	u64		reg_id_aa64dfr1;
+	u64		reg_id_aa64isar0;
+	u64		reg_id_aa64isar1;
+	u64		reg_id_aa64mmfr0;
+	u64		reg_id_aa64mmfr1;
+	u64		reg_id_aa64pfr0;
+	u64		reg_id_aa64pfr1;
+
+	u32		reg_id_dfr0;
+	u32		reg_id_isar0;
+	u32		reg_id_isar1;
+	u32		reg_id_isar2;
+	u32		reg_id_isar3;
+	u32		reg_id_isar4;
+	u32		reg_id_isar5;
+	u32		reg_id_mmfr0;
+	u32		reg_id_mmfr1;
+	u32		reg_id_mmfr2;
+	u32		reg_id_mmfr3;
+	u32		reg_id_pfr0;
+	u32		reg_id_pfr1;
+
+	u32		reg_mvfr0;
+	u32		reg_mvfr1;
+	u32		reg_mvfr2;
+};
+
+DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
+
+void cpuinfo_store_cpu(void);
+void __init cpuinfo_store_boot_cpu(void);
+
+#endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
new file mode 100644
index 000000000..5a31d6716
--- /dev/null
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPU_OPS_H
+#define __ASM_CPU_OPS_H
+
+#include <linux/init.h>
+#include <linux/threads.h>
+
+struct device_node;
+
+/**
+ * struct cpu_operations - Callback operations for hotplugging CPUs.
+ *
+ * @name:	Name of the property as appears in a devicetree cpu node's
+ *		enable-method property.
+ * @cpu_init:	Reads any data necessary for a specific enable-method from the
+ *		devicetree, for a given cpu node and proposed logical id.
+ * @cpu_prepare: Early one-time preparation step for a cpu. If there is a
+ *		mechanism for doing so, tests whether it is possible to boot
+ *		the given CPU.
+ * @cpu_boot:	Boots a cpu into the kernel.
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
+ *		synchronisation. Called from the cpu being booted.
+ * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific
+ * 		reason, which will cause the hot unplug to be aborted. Called
+ * 		from the cpu to be killed.
+ * @cpu_die:	Makes a cpu leave the kernel. Must not fail. Called from the
+ *		cpu being killed.
+ * @cpu_kill:  Ensures a cpu has left the kernel. Called from another cpu.
+ * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from
+ *		devicetree, for a given cpu node and proposed logical id.
+ * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
+ *               to wrong parameters or error conditions. Called from the
+ *               CPU being suspended. Must be called with IRQs disabled.
+ */
+struct cpu_operations {
+	const char	*name;
+	int		(*cpu_init)(struct device_node *, unsigned int);
+	int		(*cpu_prepare)(unsigned int);
+	int		(*cpu_boot)(unsigned int);
+	void		(*cpu_postboot)(void);
+#ifdef CONFIG_HOTPLUG_CPU
+	int		(*cpu_disable)(unsigned int cpu);
+	void		(*cpu_die)(unsigned int cpu);
+	int		(*cpu_kill)(unsigned int cpu);
+#endif
+#ifdef CONFIG_CPU_IDLE
+	int		(*cpu_init_idle)(struct device_node *, unsigned int);
+	int		(*cpu_suspend)(unsigned long);
+#endif
+};
+
+extern const struct cpu_operations *cpu_ops[NR_CPUS];
+int __init cpu_read_ops(struct device_node *dn, int cpu);
+void __init cpu_read_bootcpu_ops(void);
+const struct cpu_operations *cpu_get_ops(const char *name);
+
+#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
new file mode 100644
index 000000000..82cb9f98b
--- /dev/null
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <asm/hwcap.h>
+
+/*
+ * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
+ * in the kernel and for user space to keep track of which optional features
+ * are supported by the current system. So let's map feature 'x' to HWCAP_x.
+ * Note that HWCAP_x constants are bit fields so we need to take the log.
+ */
+
+#define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
+#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+
+#define ARM64_WORKAROUND_CLEAN_CACHE		0
+#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
+#define ARM64_WORKAROUND_845719			2
+
+#define ARM64_NCAPS				3
+
+#ifndef __ASSEMBLY__
+
+struct arm64_cpu_capabilities {
+	const char *desc;
+	u16 capability;
+	bool (*matches)(const struct arm64_cpu_capabilities *);
+	union {
+		struct {	/* To be used for erratum handling only */
+			u32 midr_model;
+			u32 midr_range_min, midr_range_max;
+		};
+	};
+};
+
+extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+
+static inline bool cpus_have_cap(unsigned int num)
+{
+	if (num >= ARM64_NCAPS)
+		return false;
+	return test_bit(num, cpu_hwcaps);
+}
+
+static inline void cpus_set_cap(unsigned int num)
+{
+	if (num >= ARM64_NCAPS)
+		pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
+			num, ARM64_NCAPS);
+	else
+		__set_bit(num, cpu_hwcaps);
+}
+
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info);
+void check_local_cpu_errata(void);
+void check_local_cpu_features(void);
+bool cpu_supports_mixed_endian_el0(void);
+bool system_supports_mixed_endian_el0(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
new file mode 100644
index 000000000..141b2fcab
--- /dev/null
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_CPUIDLE_H
+#define __ASM_CPUIDLE_H
+
+#include <asm/proc-fns.h>
+
+#ifdef CONFIG_CPU_IDLE
+extern int arm_cpuidle_init(unsigned int cpu);
+extern int cpu_suspend(unsigned long arg);
+#else
+static inline int arm_cpuidle_init(unsigned int cpu)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int cpu_suspend(unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+static inline int arm_cpuidle_suspend(int index)
+{
+	return cpu_suspend(index);
+}
+#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
new file mode 100644
index 000000000..a84ec605b
--- /dev/null
+++ b/arch/arm64/include/asm/cputype.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPUTYPE_H
+#define __ASM_CPUTYPE_H
+
+#define INVALID_HWID		ULONG_MAX
+
+#define MPIDR_UP_BITMASK	(0x1 << 30)
+#define MPIDR_MT_BITMASK	(0x1 << 24)
+#define MPIDR_HWID_BITMASK	0xff00ffffff
+
+#define MPIDR_LEVEL_BITS_SHIFT	3
+#define MPIDR_LEVEL_BITS	(1 << MPIDR_LEVEL_BITS_SHIFT)
+#define MPIDR_LEVEL_MASK	((1 << MPIDR_LEVEL_BITS) - 1)
+
+#define MPIDR_LEVEL_SHIFT(level) \
+	(((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT)
+
+#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
+	((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
+
+#define read_cpuid(reg) ({						\
+	u64 __val;							\
+	asm("mrs	%0, " #reg : "=r" (__val));			\
+	__val;								\
+})
+
+#define MIDR_REVISION_MASK	0xf
+#define MIDR_REVISION(midr)	((midr) & MIDR_REVISION_MASK)
+#define MIDR_PARTNUM_SHIFT	4
+#define MIDR_PARTNUM_MASK	(0xfff << MIDR_PARTNUM_SHIFT)
+#define MIDR_PARTNUM(midr)	\
+	(((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
+#define MIDR_ARCHITECTURE_SHIFT	16
+#define MIDR_ARCHITECTURE_MASK	(0xf << MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_ARCHITECTURE(midr)	\
+	(((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_VARIANT_SHIFT	20
+#define MIDR_VARIANT_MASK	(0xf << MIDR_VARIANT_SHIFT)
+#define MIDR_VARIANT(midr)	\
+	(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
+#define MIDR_IMPLEMENTOR_SHIFT	24
+#define MIDR_IMPLEMENTOR_MASK	(0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR(midr)	\
+	(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
+
+#define MIDR_CPU_PART(imp, partnum) \
+	(((imp)			<< MIDR_IMPLEMENTOR_SHIFT) | \
+	(0xf			<< MIDR_ARCHITECTURE_SHIFT) | \
+	((partnum)		<< MIDR_PARTNUM_SHIFT))
+
+#define ARM_CPU_IMP_ARM		0x41
+#define ARM_CPU_IMP_APM		0x50
+
+#define ARM_CPU_PART_AEM_V8	0xD0F
+#define ARM_CPU_PART_FOUNDATION	0xD00
+#define ARM_CPU_PART_CORTEX_A57	0xD07
+#define ARM_CPU_PART_CORTEX_A53	0xD03
+
+#define APM_CPU_PART_POTENZA	0x000
+
+#define ID_AA64MMFR0_BIGENDEL0_SHIFT	16
+#define ID_AA64MMFR0_BIGENDEL0_MASK	(0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT)
+#define ID_AA64MMFR0_BIGENDEL0(mmfr0)	\
+	(((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT)
+#define ID_AA64MMFR0_BIGEND_SHIFT	8
+#define ID_AA64MMFR0_BIGEND_MASK	(0xf << ID_AA64MMFR0_BIGEND_SHIFT)
+#define ID_AA64MMFR0_BIGEND(mmfr0)	\
+	(((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT)
+
+#define SCTLR_EL1_CP15BEN	(0x1 << 5)
+#define SCTLR_EL1_SED		(0x1 << 8)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * The CPU ID never changes at run time, so we might as well tell the
+ * compiler that it's constant.  Use this function to read the CPU ID
+ * rather than directly reading processor_id or read_cpuid() directly.
+ */
+static inline u32 __attribute_const__ read_cpuid_id(void)
+{
+	return read_cpuid(MIDR_EL1);
+}
+
+static inline u64 __attribute_const__ read_cpuid_mpidr(void)
+{
+	return read_cpuid(MPIDR_EL1);
+}
+
+static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
+{
+	return MIDR_IMPLEMENTOR(read_cpuid_id());
+}
+
+static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+{
+	return MIDR_PARTNUM(read_cpuid_id());
+}
+
+static inline u32 __attribute_const__ read_cpuid_cachetype(void)
+{
+	return read_cpuid(CTR_EL0);
+}
+
+static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
+{
+	return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
+		(ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
+}
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
new file mode 100644
index 000000000..40ec68aa6
--- /dev/null
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_DEBUG_MONITORS_H
+#define __ASM_DEBUG_MONITORS_H
+
+#ifdef __KERNEL__
+
+/* Low-level stepping controls. */
+#define DBG_MDSCR_SS		(1 << 0)
+#define DBG_SPSR_SS		(1 << 21)
+
+/* MDSCR_EL1 enabling bits */
+#define DBG_MDSCR_KDE		(1 << 13)
+#define DBG_MDSCR_MDE		(1 << 15)
+#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+
+#define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
+
+/* AArch64 */
+#define DBG_ESR_EVT_HWBP	0x0
+#define DBG_ESR_EVT_HWSS	0x1
+#define DBG_ESR_EVT_HWWP	0x2
+#define DBG_ESR_EVT_BRK		0x6
+
+/*
+ * Break point instruction encoding
+ */
+#define BREAK_INSTR_SIZE		4
+
+/*
+ * ESR values expected for dynamic and compile time BRK instruction
+ */
+#define DBG_ESR_VAL_BRK(x)	(0xf2000000 | ((x) & 0xfffff))
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgbd are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ */
+#define FAULT_BRK_IMM			0x100
+#define KGDB_DYN_DBG_BRK_IMM		0x400
+#define KGDB_COMPILED_DBG_BRK_IMM	0x401
+
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON	0xd4200000
+
+/*
+ * BRK instruction for provoking a fault on purpose
+ * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
+ */
+#define AARCH64_BREAK_FAULT	(AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
+
+/*
+ * Extract byte from BRK instruction
+ */
+#define KGDB_DYN_DBG_BRK_INS_BYTE(x) \
+	((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff)
+
+/*
+ * Extract byte from BRK #imm16
+ */
+#define KGBD_DYN_DBG_BRK_IMM_BYTE(x) \
+	(((((KGDB_DYN_DBG_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff)
+
+#define KGDB_DYN_DBG_BRK_BYTE(x) \
+	(KGDB_DYN_DBG_BRK_INS_BYTE(x) | KGBD_DYN_DBG_BRK_IMM_BYTE(x))
+
+#define  KGDB_DYN_BRK_INS_BYTE0  KGDB_DYN_DBG_BRK_BYTE(0)
+#define  KGDB_DYN_BRK_INS_BYTE1  KGDB_DYN_DBG_BRK_BYTE(1)
+#define  KGDB_DYN_BRK_INS_BYTE2  KGDB_DYN_DBG_BRK_BYTE(2)
+#define  KGDB_DYN_BRK_INS_BYTE3  KGDB_DYN_DBG_BRK_BYTE(3)
+
+#define CACHE_FLUSH_IS_SAFE		1
+
+/* AArch32 */
+#define DBG_ESR_EVT_BKPT	0x4
+#define DBG_ESR_EVT_VECC	0x5
+
+#define AARCH32_BREAK_ARM	0x07f001f0
+#define AARCH32_BREAK_THUMB	0xde01
+#define AARCH32_BREAK_THUMB2_LO	0xf7f0
+#define AARCH32_BREAK_THUMB2_HI	0xa000
+
+#ifndef __ASSEMBLY__
+struct task_struct;
+
+#define DBG_ARCH_ID_RESERVED	0	/* In case of ptrace ABI updates. */
+
+#define DBG_HOOK_HANDLED	0
+#define DBG_HOOK_ERROR		1
+
+struct step_hook {
+	struct list_head node;
+	int (*fn)(struct pt_regs *regs, unsigned int esr);
+};
+
+void register_step_hook(struct step_hook *hook);
+void unregister_step_hook(struct step_hook *hook);
+
+struct break_hook {
+	struct list_head node;
+	u32 esr_val;
+	u32 esr_mask;
+	int (*fn)(struct pt_regs *regs, unsigned int esr);
+};
+
+void register_break_hook(struct break_hook *hook);
+void unregister_break_hook(struct break_hook *hook);
+
+u8 debug_monitors_arch(void);
+
+enum debug_el {
+	DBG_ACTIVE_EL0 = 0,
+	DBG_ACTIVE_EL1,
+};
+
+void enable_debug_monitors(enum debug_el el);
+void disable_debug_monitors(enum debug_el el);
+
+void user_rewind_single_step(struct task_struct *task);
+void user_fastforward_single_step(struct task_struct *task);
+
+void kernel_enable_single_step(struct pt_regs *regs);
+void kernel_disable_single_step(void);
+int kernel_active_single_step(void);
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+int reinstall_suspended_bps(struct pt_regs *regs);
+#else
+static inline int reinstall_suspended_bps(struct pt_regs *regs)
+{
+	return -ENODEV;
+}
+#endif
+
+int aarch32_break_handler(struct pt_regs *regs);
+
+#endif	/* __ASSEMBLY */
+#endif	/* __KERNEL__ */
+#endif	/* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
new file mode 100644
index 000000000..243ef256b
--- /dev/null
+++ b/arch/arm64/include/asm/device.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_DEVICE_H
+#define __ASM_DEVICE_H
+
+struct dev_archdata {
+	struct dma_map_ops *dma_ops;
+#ifdef CONFIG_IOMMU_API
+	void *iommu;			/* private IOMMU data */
+#endif
+	bool dma_coherent;
+};
+
+struct pdev_archdata {
+};
+
+#endif
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
new file mode 100644
index 000000000..9437e3dc5
--- /dev/null
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_DMA_MAPPING_H
+#define __ASM_DMA_MAPPING_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#include <asm-generic/dma-coherent.h>
+
+#include <xen/xen.h>
+#include <asm/xen/hypervisor.h>
+
+#define DMA_ERROR_CODE	(~(dma_addr_t)0)
+extern struct dma_map_ops *dma_ops;
+
+static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
+{
+	if (unlikely(!dev) || !dev->archdata.dma_ops)
+		return dma_ops;
+	else
+		return dev->archdata.dma_ops;
+}
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+	if (xen_initial_domain())
+		return xen_dma_ops;
+	else
+		return __generic_dma_ops(dev);
+}
+
+static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				      struct iommu_ops *iommu, bool coherent)
+{
+	dev->archdata.dma_coherent = coherent;
+}
+#define arch_setup_dma_ops	arch_setup_dma_ops
+
+/* do not use this function in a driver */
+static inline bool is_device_dma_coherent(struct device *dev)
+{
+	if (!dev)
+		return false;
+	return dev->archdata.dma_coherent;
+}
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return (dma_addr_t)paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+{
+	return (phys_addr_t)dev_addr;
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	debug_dma_mapping_error(dev, dev_addr);
+	return ops->mapping_error(dev, dev_addr);
+}
+
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	return ops->dma_supported(dev, mask);
+}
+
+static inline int dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	if (!dev->dma_mask)
+		return false;
+
+	return addr + size - 1 <= *dev->dma_mask;
+}
+
+static inline void dma_mark_clean(void *addr, size_t size)
+{
+}
+
+#define dma_alloc_coherent(d, s, h, f)	dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, h, f)	dma_free_attrs(d, s, h, f, NULL)
+
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flags,
+				    struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	void *vaddr;
+
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr))
+		return vaddr;
+
+	vaddr = ops->alloc(dev, size, dma_handle, flags, attrs);
+	debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr);
+	return vaddr;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *vaddr, dma_addr_t dev_addr,
+				  struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	if (dma_release_from_coherent(dev, get_order(size), vaddr))
+		return;
+
+	debug_dma_free_coherent(dev, size, vaddr, dev_addr);
+	ops->free(dev, size, vaddr, dev_addr, attrs);
+}
+
+/*
+ * There is no dma_cache_sync() implementation, so just return NULL here.
+ */
+static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
+					  dma_addr_t *handle, gfp_t flags)
+{
+	return NULL;
+}
+
+static inline void dma_free_noncoherent(struct device *dev, size_t size,
+					void *cpu_addr, dma_addr_t handle)
+{
+}
+
+#endif	/* __KERNEL__ */
+#endif	/* __ASM_DMA_MAPPING_H */
diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h
new file mode 100644
index 000000000..69d37d87b
--- /dev/null
+++ b/arch/arm64/include/asm/dmi.h
@@ -0,0 +1,31 @@
+/*
+ * arch/arm64/include/asm/dmi.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Written by: Yi Li (yi.li@linaro.org)
+ *
+ * based on arch/ia64/include/asm/dmi.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __ASM_DMI_H
+#define __ASM_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+
+/*
+ * According to section 2.3.6 of the UEFI spec, the firmware should not
+ * request a virtual mapping for configuration tables such as SMBIOS.
+ * This means we have to map them before use.
+ */
+#define dmi_early_remap(x, l)		ioremap_cache(x, l)
+#define dmi_early_unmap(x, l)		iounmap(x)
+#define dmi_remap(x, l)			ioremap_cache(x, l)
+#define dmi_unmap(x)			iounmap(x)
+#define dmi_alloc(l)			kzalloc(l, GFP_KERNEL)
+
+#endif
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
new file mode 100644
index 000000000..ef572206f
--- /dev/null
+++ b/arch/arm64/include/asm/efi.h
@@ -0,0 +1,69 @@
+#ifndef _ASM_EFI_H
+#define _ASM_EFI_H
+
+#include <asm/io.h>
+#include <asm/neon.h>
+
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+#else
+#define efi_init()
+#endif
+
+#define efi_call_virt(f, ...)						\
+({									\
+	efi_##f##_t *__f;						\
+	efi_status_t __s;						\
+									\
+	kernel_neon_begin();						\
+	efi_virtmap_load();						\
+	__f = efi.systab->runtime->f;					\
+	__s = __f(__VA_ARGS__);						\
+	efi_virtmap_unload();						\
+	kernel_neon_end();						\
+	__s;								\
+})
+
+#define __efi_call_virt(f, ...)						\
+({									\
+	efi_##f##_t *__f;						\
+									\
+	kernel_neon_begin();						\
+	efi_virtmap_load();						\
+	__f = efi.systab->runtime->f;					\
+	__f(__VA_ARGS__);						\
+	efi_virtmap_unload();						\
+	kernel_neon_end();						\
+})
+
+/* arch specific definitions used by the stub code */
+
+/*
+ * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from
+ * start of kernel and may not cross a 2MiB boundary. We set alignment to
+ * 2MiB so we know it won't cross a 2MiB boundary.
+ */
+#define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
+#define MAX_FDT_OFFSET	SZ_512M
+
+#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+
+#define EFI_ALLOC_ALIGN		SZ_64K
+
+/*
+ * On ARM systems, virtually remapped UEFI runtime services are set up in two
+ * distinct stages:
+ * - The stub retrieves the final version of the memory map from UEFI, populates
+ *   the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime
+ *   service to communicate the new mapping to the firmware (Note that the new
+ *   mapping is not live at this time)
+ * - During an early initcall(), the EFI system table is permanently remapped
+ *   and the virtual remapping of the UEFI Runtime Services regions is loaded
+ *   into a private set of page tables. If this all succeeds, the Runtime
+ *   Services are enabled and the EFI_RUNTIME_SERVICES bit set.
+ */
+
+void efi_virtmap_load(void);
+void efi_virtmap_unload(void);
+
+#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
new file mode 100644
index 000000000..faad6df49
--- /dev/null
+++ b/arch/arm64/include/asm/elf.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_ELF_H
+#define __ASM_ELF_H
+
+#include <asm/hwcap.h>
+
+/*
+ * ELF register definitions..
+ */
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
+#define ELF_CORE_COPY_REGS(dest, regs)	\
+	*(struct user_pt_regs *)&(dest) = (regs)->user_regs;
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_fpsimd_state elf_fpregset_t;
+
+/*
+ * AArch64 static relocation types.
+ */
+
+/* Miscellaneous. */
+#define R_ARM_NONE			0
+#define R_AARCH64_NONE			256
+
+/* Data. */
+#define R_AARCH64_ABS64			257
+#define R_AARCH64_ABS32			258
+#define R_AARCH64_ABS16			259
+#define R_AARCH64_PREL64		260
+#define R_AARCH64_PREL32		261
+#define R_AARCH64_PREL16		262
+
+/* Instructions. */
+#define R_AARCH64_MOVW_UABS_G0		263
+#define R_AARCH64_MOVW_UABS_G0_NC	264
+#define R_AARCH64_MOVW_UABS_G1		265
+#define R_AARCH64_MOVW_UABS_G1_NC	266
+#define R_AARCH64_MOVW_UABS_G2		267
+#define R_AARCH64_MOVW_UABS_G2_NC	268
+#define R_AARCH64_MOVW_UABS_G3		269
+
+#define R_AARCH64_MOVW_SABS_G0		270
+#define R_AARCH64_MOVW_SABS_G1		271
+#define R_AARCH64_MOVW_SABS_G2		272
+
+#define R_AARCH64_LD_PREL_LO19		273
+#define R_AARCH64_ADR_PREL_LO21		274
+#define R_AARCH64_ADR_PREL_PG_HI21	275
+#define R_AARCH64_ADR_PREL_PG_HI21_NC	276
+#define R_AARCH64_ADD_ABS_LO12_NC	277
+#define R_AARCH64_LDST8_ABS_LO12_NC	278
+
+#define R_AARCH64_TSTBR14		279
+#define R_AARCH64_CONDBR19		280
+#define R_AARCH64_JUMP26		282
+#define R_AARCH64_CALL26		283
+#define R_AARCH64_LDST16_ABS_LO12_NC	284
+#define R_AARCH64_LDST32_ABS_LO12_NC	285
+#define R_AARCH64_LDST64_ABS_LO12_NC	286
+#define R_AARCH64_LDST128_ABS_LO12_NC	299
+
+#define R_AARCH64_MOVW_PREL_G0		287
+#define R_AARCH64_MOVW_PREL_G0_NC	288
+#define R_AARCH64_MOVW_PREL_G1		289
+#define R_AARCH64_MOVW_PREL_G1_NC	290
+#define R_AARCH64_MOVW_PREL_G2		291
+#define R_AARCH64_MOVW_PREL_G2_NC	292
+#define R_AARCH64_MOVW_PREL_G3		293
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS64
+#ifdef __AARCH64EB__
+#define ELF_DATA	ELFDATA2MSB
+#else
+#define ELF_DATA	ELFDATA2LSB
+#endif
+#define ELF_ARCH	EM_AARCH64
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+#define ELF_PLATFORM_SIZE	16
+#ifdef __AARCH64EB__
+#define ELF_PLATFORM		("aarch64_be")
+#else
+#define ELF_PLATFORM		("aarch64")
+#endif
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x)		((x)->e_machine == EM_AARCH64)
+
+#define elf_read_implies_exec(ex,stk)	(stk != EXSTACK_DISABLE_X)
+
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE	(2 * TASK_SIZE_64 / 3)
+
+/*
+ * When the program starts, a1 contains a pointer to a function to be
+ * registered with atexit, as per the SVR4 ABI.  A value of 0 means we have no
+ * such handler.
+ */
+#define ELF_PLAT_INIT(_r, load_addr)	(_r)->regs[0] = 0
+
+#define SET_PERSONALITY(ex)		clear_thread_flag(TIF_32BIT);
+
+#define ARCH_DLINFO							\
+do {									\
+	NEW_AUX_ENT(AT_SYSINFO_EHDR,					\
+		    (elf_addr_t)current->mm->context.vdso);		\
+} while (0)
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int uses_interp);
+
+/* 1GB of VA */
+#ifdef CONFIG_COMPAT
+#define STACK_RND_MASK			(test_thread_flag(TIF_32BIT) ? \
+						0x7ff >> (PAGE_SHIFT - 12) : \
+						0x3ffff >> (PAGE_SHIFT - 12))
+#else
+#define STACK_RND_MASK			(0x3ffff >> (PAGE_SHIFT - 12))
+#endif
+
+#ifdef CONFIG_COMPAT
+
+#ifdef __AARCH64EB__
+#define COMPAT_ELF_PLATFORM		("v8b")
+#else
+#define COMPAT_ELF_PLATFORM		("v8l")
+#endif
+
+#define COMPAT_ELF_ET_DYN_BASE		(2 * TASK_SIZE_32 / 3)
+
+/* AArch32 registers. */
+#define COMPAT_ELF_NGREG		18
+typedef unsigned int			compat_elf_greg_t;
+typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
+
+/* AArch32 EABI. */
+#define EF_ARM_EABI_MASK		0xff000000
+#define compat_elf_check_arch(x)	(((x)->e_machine == EM_ARM) && \
+					 ((x)->e_flags & EF_ARM_EABI_MASK))
+
+#define compat_start_thread		compat_start_thread
+#define COMPAT_SET_PERSONALITY(ex)	set_thread_flag(TIF_32BIT);
+#define COMPAT_ARCH_DLINFO
+extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
+				      int uses_interp);
+#define compat_arch_setup_additional_pages \
+					aarch32_setup_vectors_page
+
+#endif /* CONFIG_COMPAT */
+
+#endif
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
new file mode 100644
index 000000000..70522450c
--- /dev/null
+++ b/arch/arm64/include/asm/esr.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_ESR_H
+#define __ASM_ESR_H
+
+#define ESR_ELx_EC_UNKNOWN	(0x00)
+#define ESR_ELx_EC_WFx		(0x01)
+/* Unallocated EC: 0x02 */
+#define ESR_ELx_EC_CP15_32	(0x03)
+#define ESR_ELx_EC_CP15_64	(0x04)
+#define ESR_ELx_EC_CP14_MR	(0x05)
+#define ESR_ELx_EC_CP14_LS	(0x06)
+#define ESR_ELx_EC_FP_ASIMD	(0x07)
+#define ESR_ELx_EC_CP10_ID	(0x08)
+/* Unallocated EC: 0x09 - 0x0B */
+#define ESR_ELx_EC_CP14_64	(0x0C)
+/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_ILL		(0x0E)
+/* Unallocated EC: 0x0F - 0x10 */
+#define ESR_ELx_EC_SVC32	(0x11)
+#define ESR_ELx_EC_HVC32	(0x12)
+#define ESR_ELx_EC_SMC32	(0x13)
+/* Unallocated EC: 0x14 */
+#define ESR_ELx_EC_SVC64	(0x15)
+#define ESR_ELx_EC_HVC64	(0x16)
+#define ESR_ELx_EC_SMC64	(0x17)
+#define ESR_ELx_EC_SYS64	(0x18)
+/* Unallocated EC: 0x19 - 0x1E */
+#define ESR_ELx_EC_IMP_DEF	(0x1f)
+#define ESR_ELx_EC_IABT_LOW	(0x20)
+#define ESR_ELx_EC_IABT_CUR	(0x21)
+#define ESR_ELx_EC_PC_ALIGN	(0x22)
+/* Unallocated EC: 0x23 */
+#define ESR_ELx_EC_DABT_LOW	(0x24)
+#define ESR_ELx_EC_DABT_CUR	(0x25)
+#define ESR_ELx_EC_SP_ALIGN	(0x26)
+/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_FP_EXC32	(0x28)
+/* Unallocated EC: 0x29 - 0x2B */
+#define ESR_ELx_EC_FP_EXC64	(0x2C)
+/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_SERROR	(0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW	(0x30)
+#define ESR_ELx_EC_BREAKPT_CUR	(0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW	(0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR	(0x33)
+#define ESR_ELx_EC_WATCHPT_LOW	(0x34)
+#define ESR_ELx_EC_WATCHPT_CUR	(0x35)
+/* Unallocated EC: 0x36 - 0x37 */
+#define ESR_ELx_EC_BKPT32	(0x38)
+/* Unallocated EC: 0x39 */
+#define ESR_ELx_EC_VECTOR32	(0x3A)
+/* Unallocted EC: 0x3B */
+#define ESR_ELx_EC_BRK64	(0x3C)
+/* Unallocated EC: 0x3D - 0x3F */
+#define ESR_ELx_EC_MAX		(0x3F)
+
+#define ESR_ELx_EC_SHIFT	(26)
+#define ESR_ELx_EC_MASK		(UL(0x3F) << ESR_ELx_EC_SHIFT)
+
+#define ESR_ELx_IL		(UL(1) << 25)
+#define ESR_ELx_ISS_MASK	(ESR_ELx_IL - 1)
+#define ESR_ELx_ISV		(UL(1) << 24)
+#define ESR_ELx_SAS_SHIFT	(22)
+#define ESR_ELx_SAS		(UL(3) << ESR_ELx_SAS_SHIFT)
+#define ESR_ELx_SSE		(UL(1) << 21)
+#define ESR_ELx_SRT_SHIFT	(16)
+#define ESR_ELx_SRT_MASK	(UL(0x1F) << ESR_ELx_SRT_SHIFT)
+#define ESR_ELx_SF 		(UL(1) << 15)
+#define ESR_ELx_AR 		(UL(1) << 14)
+#define ESR_ELx_EA 		(UL(1) << 9)
+#define ESR_ELx_CM 		(UL(1) << 8)
+#define ESR_ELx_S1PTW 		(UL(1) << 7)
+#define ESR_ELx_WNR		(UL(1) << 6)
+#define ESR_ELx_FSC		(0x3F)
+#define ESR_ELx_FSC_TYPE	(0x3C)
+#define ESR_ELx_FSC_EXTABT	(0x10)
+#define ESR_ELx_FSC_ACCESS	(0x08)
+#define ESR_ELx_FSC_FAULT	(0x04)
+#define ESR_ELx_FSC_PERM	(0x0C)
+#define ESR_ELx_CV		(UL(1) << 24)
+#define ESR_ELx_COND_SHIFT	(20)
+#define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
+#define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
+#define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+
+const char *esr_get_class_string(u32 esr);
+#endif /* __ASSEMBLY */
+
+#endif /* __ASM_ESR_H */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
new file mode 100644
index 000000000..0303705fc
--- /dev/null
+++ b/arch/arm64/include/asm/exception.h
@@ -0,0 +1,24 @@
+/*
+ * Based on arch/arm/include/asm/exception.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_EXCEPTION_H
+#define __ASM_EXCEPTION_H
+
+#define __exception	__attribute__((section(".exception.text")))
+#define __exception_irq_entry	__exception
+
+#endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h
new file mode 100644
index 000000000..db0563c23
--- /dev/null
+++ b/arch/arm64/include/asm/exec.h
@@ -0,0 +1,23 @@
+/*
+ * Based on arch/arm/include/asm/exec.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_EXEC_H
+#define __ASM_EXEC_H
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+#endif	/* __ASM_EXEC_H */
diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h
new file mode 100644
index 000000000..adb88a64b
--- /dev/null
+++ b/arch/arm64/include/asm/fb.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_FB_H_
+#define __ASM_FB_H_
+
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <asm/page.h>
+
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+}
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* __ASM_FB_H_ */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
new file mode 100644
index 000000000..95e6b6dcb
--- /dev/null
+++ b/arch/arm64/include/asm/fixmap.h
@@ -0,0 +1,73 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright (C) 2013 Mark Salter <msalter@redhat.com>
+ *
+ * Adapted from arch/x86_64 version.
+ *
+ */
+
+#ifndef _ASM_ARM64_FIXMAP_H
+#define _ASM_ARM64_FIXMAP_H
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process.
+ *
+ * These 'compile-time allocated' memory buffers are
+ * page-sized. Use set_fixmap(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ */
+enum fixed_addresses {
+	FIX_HOLE,
+	FIX_EARLYCON_MEM_BASE,
+	FIX_TEXT_POKE0,
+	__end_of_permanent_fixed_addresses,
+
+	/*
+	 * Temporary boot-time mappings, used by early_ioremap(),
+	 * before ioremap() is functional.
+	 */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define NR_FIX_BTMAPS		4
+#else
+#define NR_FIX_BTMAPS		64
+#endif
+#define FIX_BTMAPS_SLOTS	7
+#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+	__end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
+
+#define FIXMAP_PAGE_IO     __pgprot(PROT_DEVICE_nGnRE)
+
+void __init early_fixmap_init(void);
+
+#define __early_set_fixmap __set_fixmap
+
+#define __late_set_fixmap __set_fixmap
+#define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR)
+
+extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
+
+#include <asm-generic/fixmap.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_ARM64_FIXMAP_H */
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
new file mode 100644
index 000000000..50f559f57
--- /dev/null
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_FP_H
+#define __ASM_FP_H
+
+#include <asm/ptrace.h>
+
+#ifndef __ASSEMBLY__
+
+/*
+ * FP/SIMD storage area has:
+ *  - FPSR and FPCR
+ *  - 32 128-bit data registers
+ *
+ * Note that user_fpsimd forms a prefix of this structure, which is
+ * relied upon in the ptrace FP/SIMD accessors.
+ */
+struct fpsimd_state {
+	union {
+		struct user_fpsimd_state user_fpsimd;
+		struct {
+			__uint128_t vregs[32];
+			u32 fpsr;
+			u32 fpcr;
+		};
+	};
+	/* the id of the last cpu to have restored this state */
+	unsigned int cpu;
+};
+
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+	u32		fpsr;
+	u32		fpcr;
+	u32		num_regs;
+	__uint128_t	vregs[32];
+};
+
+
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+/* Masks for extracting the FPSR and FPCR from the FPSCR */
+#define VFP_FPSCR_STAT_MASK	0xf800009f
+#define VFP_FPSCR_CTRL_MASK	0x07f79f00
+/*
+ * The VFP state has 32x64-bit registers and a single 32-bit
+ * control/status register.
+ */
+#define VFP_STATE_SIZE		((32 * 8) + 4)
+#endif
+
+struct task_struct;
+
+extern void fpsimd_save_state(struct fpsimd_state *state);
+extern void fpsimd_load_state(struct fpsimd_state *state);
+
+extern void fpsimd_thread_switch(struct task_struct *next);
+extern void fpsimd_flush_thread(void);
+
+extern void fpsimd_preserve_current_state(void);
+extern void fpsimd_restore_current_state(void);
+extern void fpsimd_update_current_state(struct fpsimd_state *state);
+
+extern void fpsimd_flush_task_state(struct task_struct *target);
+
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+				      u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
new file mode 100644
index 000000000..a2daf1293
--- /dev/null
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -0,0 +1,133 @@
+/*
+ * FP/SIMD state saving and restoring macros
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+.macro fpsimd_save state, tmpnr
+	stp	q0, q1, [\state, #16 * 0]
+	stp	q2, q3, [\state, #16 * 2]
+	stp	q4, q5, [\state, #16 * 4]
+	stp	q6, q7, [\state, #16 * 6]
+	stp	q8, q9, [\state, #16 * 8]
+	stp	q10, q11, [\state, #16 * 10]
+	stp	q12, q13, [\state, #16 * 12]
+	stp	q14, q15, [\state, #16 * 14]
+	stp	q16, q17, [\state, #16 * 16]
+	stp	q18, q19, [\state, #16 * 18]
+	stp	q20, q21, [\state, #16 * 20]
+	stp	q22, q23, [\state, #16 * 22]
+	stp	q24, q25, [\state, #16 * 24]
+	stp	q26, q27, [\state, #16 * 26]
+	stp	q28, q29, [\state, #16 * 28]
+	stp	q30, q31, [\state, #16 * 30]!
+	mrs	x\tmpnr, fpsr
+	str	w\tmpnr, [\state, #16 * 2]
+	mrs	x\tmpnr, fpcr
+	str	w\tmpnr, [\state, #16 * 2 + 4]
+.endm
+
+.macro fpsimd_restore_fpcr state, tmp
+	/*
+	 * Writes to fpcr may be self-synchronising, so avoid restoring
+	 * the register if it hasn't changed.
+	 */
+	mrs	\tmp, fpcr
+	cmp	\tmp, \state
+	b.eq	9999f
+	msr	fpcr, \state
+9999:
+.endm
+
+/* Clobbers \state */
+.macro fpsimd_restore state, tmpnr
+	ldp	q0, q1, [\state, #16 * 0]
+	ldp	q2, q3, [\state, #16 * 2]
+	ldp	q4, q5, [\state, #16 * 4]
+	ldp	q6, q7, [\state, #16 * 6]
+	ldp	q8, q9, [\state, #16 * 8]
+	ldp	q10, q11, [\state, #16 * 10]
+	ldp	q12, q13, [\state, #16 * 12]
+	ldp	q14, q15, [\state, #16 * 14]
+	ldp	q16, q17, [\state, #16 * 16]
+	ldp	q18, q19, [\state, #16 * 18]
+	ldp	q20, q21, [\state, #16 * 20]
+	ldp	q22, q23, [\state, #16 * 22]
+	ldp	q24, q25, [\state, #16 * 24]
+	ldp	q26, q27, [\state, #16 * 26]
+	ldp	q28, q29, [\state, #16 * 28]
+	ldp	q30, q31, [\state, #16 * 30]!
+	ldr	w\tmpnr, [\state, #16 * 2]
+	msr	fpsr, x\tmpnr
+	ldr	w\tmpnr, [\state, #16 * 2 + 4]
+	fpsimd_restore_fpcr x\tmpnr, \state
+.endm
+
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+	mrs	x\tmpnr1, fpsr
+	str	w\numnr, [\state, #8]
+	mrs	x\tmpnr2, fpcr
+	stp	w\tmpnr1, w\tmpnr2, [\state]
+	adr	x\tmpnr1, 0f
+	add	\state, \state, x\numnr, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+	br	x\tmpnr1
+	stp	q30, q31, [\state, #-16 * 30 - 16]
+	stp	q28, q29, [\state, #-16 * 28 - 16]
+	stp	q26, q27, [\state, #-16 * 26 - 16]
+	stp	q24, q25, [\state, #-16 * 24 - 16]
+	stp	q22, q23, [\state, #-16 * 22 - 16]
+	stp	q20, q21, [\state, #-16 * 20 - 16]
+	stp	q18, q19, [\state, #-16 * 18 - 16]
+	stp	q16, q17, [\state, #-16 * 16 - 16]
+	stp	q14, q15, [\state, #-16 * 14 - 16]
+	stp	q12, q13, [\state, #-16 * 12 - 16]
+	stp	q10, q11, [\state, #-16 * 10 - 16]
+	stp	q8, q9, [\state, #-16 * 8 - 16]
+	stp	q6, q7, [\state, #-16 * 6 - 16]
+	stp	q4, q5, [\state, #-16 * 4 - 16]
+	stp	q2, q3, [\state, #-16 * 2 - 16]
+	stp	q0, q1, [\state, #-16 * 0 - 16]
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+	ldp	w\tmpnr1, w\tmpnr2, [\state]
+	msr	fpsr, x\tmpnr1
+	fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
+	adr	x\tmpnr1, 0f
+	ldr	w\tmpnr2, [\state, #8]
+	add	\state, \state, x\tmpnr2, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+	br	x\tmpnr1
+	ldp	q30, q31, [\state, #-16 * 30 - 16]
+	ldp	q28, q29, [\state, #-16 * 28 - 16]
+	ldp	q26, q27, [\state, #-16 * 26 - 16]
+	ldp	q24, q25, [\state, #-16 * 24 - 16]
+	ldp	q22, q23, [\state, #-16 * 22 - 16]
+	ldp	q20, q21, [\state, #-16 * 20 - 16]
+	ldp	q18, q19, [\state, #-16 * 18 - 16]
+	ldp	q16, q17, [\state, #-16 * 16 - 16]
+	ldp	q14, q15, [\state, #-16 * 14 - 16]
+	ldp	q12, q13, [\state, #-16 * 12 - 16]
+	ldp	q10, q11, [\state, #-16 * 10 - 16]
+	ldp	q8, q9, [\state, #-16 * 8 - 16]
+	ldp	q6, q7, [\state, #-16 * 6 - 16]
+	ldp	q4, q5, [\state, #-16 * 4 - 16]
+	ldp	q2, q3, [\state, #-16 * 2 - 16]
+	ldp	q0, q1, [\state, #-16 * 0 - 16]
+0:
+.endm
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
new file mode 100644
index 000000000..c5534facf
--- /dev/null
+++ b/arch/arm64/include/asm/ftrace.h
@@ -0,0 +1,59 @@
+/*
+ * arch/arm64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_FTRACE_H
+#define __ASM_FTRACE_H
+
+#include <asm/insn.h>
+
+#define MCOUNT_ADDR		((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
+
+#ifndef __ASSEMBLY__
+#include <linux/compat.h>
+
+extern void _mcount(unsigned long);
+extern void *return_address(unsigned int);
+
+struct dyn_arch_ftrace {
+	/* No extra data needed for arm64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/*
+	 * addr is the address of the mcount call instruction.
+	 * recordmcount does the necessary offset calculation.
+	 */
+	return addr;
+}
+
+#define ftrace_return_address(n) return_address(n)
+
+/*
+ * Because AArch32 mode does not share the same syscall table with AArch64,
+ * tracing compat syscalls may result in reporting bogus syscalls or even
+ * hang-up, so just do not trace them.
+ * See kernel/trace/trace_syscalls.c
+ *
+ * x86 code says:
+ * If the user realy wants these, then they should use the
+ * raw syscall tracepoints with filtering.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+	return is_compat_task();
+}
+#endif /* ifndef __ASSEMBLY__ */
+
+#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
new file mode 100644
index 000000000..5f750dc96
--- /dev/null
+++ b/arch/arm64/include/asm/futex.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_FUTEX_H
+#define __ASM_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)		\
+	asm volatile(							\
+"1:	ldxr	%w1, %2\n"						\
+	insn "\n"							\
+"2:	stlxr	%w3, %w0, %2\n"						\
+"	cbnz	%w3, 1b\n"						\
+"	dmb	ish\n"							\
+"3:\n"									\
+"	.pushsection .fixup,\"ax\"\n"					\
+"	.align	2\n"							\
+"4:	mov	%w0, %w5\n"						\
+"	b	3b\n"							\
+"	.popsection\n"							\
+"	.pushsection __ex_table,\"a\"\n"				\
+"	.align	3\n"							\
+"	.quad	1b, 4b, 2b, 4b\n"					\
+"	.popsection\n"							\
+	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
+	: "r" (oparg), "Ir" (-EFAULT)					\
+	: "memory")
+
+static inline int
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret, tmp;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	pagefault_disable();	/* implies preempt_disable() */
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("mov	%w0, %w4",
+				  ret, oldval, uaddr, tmp, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("add	%w0, %w1, %w4",
+				  ret, oldval, uaddr, tmp, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("orr	%w0, %w1, %w4",
+				  ret, oldval, uaddr, tmp, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("and	%w0, %w1, %w4",
+				  ret, oldval, uaddr, tmp, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("eor	%w0, %w1, %w4",
+				  ret, oldval, uaddr, tmp, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();	/* subsumes preempt_enable() */
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	int ret = 0;
+	u32 val, tmp;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+"1:	ldxr	%w1, %2\n"
+"	sub	%w3, %w1, %w4\n"
+"	cbnz	%w3, 3f\n"
+"2:	stlxr	%w3, %w5, %2\n"
+"	cbnz	%w3, 1b\n"
+"	dmb	ish\n"
+"3:\n"
+"	.pushsection .fixup,\"ax\"\n"
+"4:	mov	%w0, %w6\n"
+"	b	3b\n"
+"	.popsection\n"
+"	.pushsection __ex_table,\"a\"\n"
+"	.align	3\n"
+"	.quad	1b, 4b, 2b, 4b\n"
+"	.popsection\n"
+	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
+	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
+	: "memory");
+
+	*uval = val;
+	return ret;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_FUTEX_H */
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
new file mode 100644
index 000000000..6aae421f4
--- /dev/null
+++ b/arch/arm64/include/asm/hardirq.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <linux/cache.h>
+#include <linux/threads.h>
+#include <asm/irq.h>
+
+#define NR_IPI	5
+
+typedef struct {
+	unsigned int __softirq_pending;
+#ifdef CONFIG_SMP
+	unsigned int ipi_irqs[NR_IPI];
+#endif
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+#define __inc_irq_stat(cpu, member)	__IRQ_STAT(cpu, member)++
+#define __get_irq_stat(cpu, member)	__IRQ_STAT(cpu, member)
+
+#ifdef CONFIG_SMP
+u64 smp_irq_stat_cpu(unsigned int cpu);
+#define arch_irq_stat_cpu	smp_irq_stat_cpu
+#endif
+
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED	1
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+	extern unsigned long irq_err_count;
+	irq_err_count++;
+}
+
+/*
+ * No arch-specific IRQ flags.
+ */
+#define set_irq_flags(irq, flags)
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
new file mode 100644
index 000000000..5b7ca8ace
--- /dev/null
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -0,0 +1,117 @@
+/*
+ * arch/arm64/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_HUGETLB_H
+#define __ASM_HUGETLB_H
+
+#include <asm-generic/hugetlb.h>
+#include <asm/page.h>
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000..52b484b6a
--- /dev/null
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_HW_BREAKPOINT_H
+#define __ASM_HW_BREAKPOINT_H
+
+#ifdef __KERNEL__
+
+struct arch_hw_breakpoint_ctrl {
+	u32 __reserved	: 19,
+	len		: 8,
+	type		: 2,
+	privilege	: 2,
+	enabled		: 1;
+};
+
+struct arch_hw_breakpoint {
+	u64 address;
+	u64 trigger;
+	struct arch_hw_breakpoint_ctrl ctrl;
+};
+
+static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
+{
+	return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
+		ctrl.enabled;
+}
+
+static inline void decode_ctrl_reg(u32 reg,
+				   struct arch_hw_breakpoint_ctrl *ctrl)
+{
+	ctrl->enabled	= reg & 0x1;
+	reg >>= 1;
+	ctrl->privilege	= reg & 0x3;
+	reg >>= 2;
+	ctrl->type	= reg & 0x3;
+	reg >>= 2;
+	ctrl->len	= reg & 0xff;
+}
+
+/* Breakpoint */
+#define ARM_BREAKPOINT_EXECUTE	0
+
+/* Watchpoints */
+#define ARM_BREAKPOINT_LOAD	1
+#define ARM_BREAKPOINT_STORE	2
+#define AARCH64_ESR_ACCESS_MASK	(1 << 6)
+
+/* Privilege Levels */
+#define AARCH64_BREAKPOINT_EL1	1
+#define AARCH64_BREAKPOINT_EL0	2
+
+/* Lengths */
+#define ARM_BREAKPOINT_LEN_1	0x1
+#define ARM_BREAKPOINT_LEN_2	0x3
+#define ARM_BREAKPOINT_LEN_4	0xf
+#define ARM_BREAKPOINT_LEN_8	0xff
+
+/* Kernel stepping */
+#define ARM_KERNEL_STEP_NONE	0
+#define ARM_KERNEL_STEP_ACTIVE	1
+#define ARM_KERNEL_STEP_SUSPEND	2
+
+/*
+ * Limits.
+ * Changing these will require modifications to the register accessors.
+ */
+#define ARM_MAX_BRP		16
+#define ARM_MAX_WRP		16
+
+/* Virtual debug register bases. */
+#define AARCH64_DBG_REG_BVR	0
+#define AARCH64_DBG_REG_BCR	(AARCH64_DBG_REG_BVR + ARM_MAX_BRP)
+#define AARCH64_DBG_REG_WVR	(AARCH64_DBG_REG_BCR + ARM_MAX_BRP)
+#define AARCH64_DBG_REG_WCR	(AARCH64_DBG_REG_WVR + ARM_MAX_WRP)
+
+/* Debug register names. */
+#define AARCH64_DBG_REG_NAME_BVR	"bvr"
+#define AARCH64_DBG_REG_NAME_BCR	"bcr"
+#define AARCH64_DBG_REG_NAME_WVR	"wvr"
+#define AARCH64_DBG_REG_NAME_WCR	"wcr"
+
+/* Accessor macros for the debug registers. */
+#define AARCH64_DBG_READ(N, REG, VAL) do {\
+	asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\
+} while (0)
+
+#define AARCH64_DBG_WRITE(N, REG, VAL) do {\
+	asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\
+} while (0)
+
+struct task_struct;
+struct notifier_block;
+struct perf_event;
+struct pmu;
+
+extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+				  int *gen_len, int *gen_type);
+extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+					   unsigned long val, void *data);
+
+extern int arch_install_hw_breakpoint(struct perf_event *bp);
+extern void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+extern void hw_breakpoint_pmu_read(struct perf_event *bp);
+extern int hw_breakpoint_slots(int type);
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern void hw_breakpoint_thread_switch(struct task_struct *next);
+extern void ptrace_hw_copy_thread(struct task_struct *task);
+#else
+static inline void hw_breakpoint_thread_switch(struct task_struct *next)
+{
+}
+static inline void ptrace_hw_copy_thread(struct task_struct *task)
+{
+}
+#endif
+
+extern struct pmu perf_ops_bp;
+
+#endif	/* __KERNEL__ */
+#endif	/* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
new file mode 100644
index 000000000..0ad735166
--- /dev/null
+++ b/arch/arm64/include/asm/hwcap.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_HWCAP_H
+#define __ASM_HWCAP_H
+
+#include <uapi/asm/hwcap.h>
+
+#define COMPAT_HWCAP_HALF	(1 << 1)
+#define COMPAT_HWCAP_THUMB	(1 << 2)
+#define COMPAT_HWCAP_FAST_MULT	(1 << 4)
+#define COMPAT_HWCAP_VFP	(1 << 6)
+#define COMPAT_HWCAP_EDSP	(1 << 7)
+#define COMPAT_HWCAP_NEON	(1 << 12)
+#define COMPAT_HWCAP_VFPv3	(1 << 13)
+#define COMPAT_HWCAP_TLS	(1 << 15)
+#define COMPAT_HWCAP_VFPv4	(1 << 16)
+#define COMPAT_HWCAP_IDIVA	(1 << 17)
+#define COMPAT_HWCAP_IDIVT	(1 << 18)
+#define COMPAT_HWCAP_IDIV	(COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
+#define COMPAT_HWCAP_LPAE	(1 << 20)
+#define COMPAT_HWCAP_EVTSTRM	(1 << 21)
+
+#define COMPAT_HWCAP2_AES	(1 << 0)
+#define COMPAT_HWCAP2_PMULL	(1 << 1)
+#define COMPAT_HWCAP2_SHA1	(1 << 2)
+#define COMPAT_HWCAP2_SHA2	(1 << 3)
+#define COMPAT_HWCAP2_CRC32	(1 << 4)
+
+#ifndef __ASSEMBLY__
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this cpu supports.
+ */
+#define ELF_HWCAP		(elf_hwcap)
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
+#define COMPAT_ELF_HWCAP2	(compat_elf_hwcap2)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
+#endif
+
+extern unsigned long elf_hwcap;
+#endif
+#endif
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
new file mode 100644
index 000000000..d2c79049f
--- /dev/null
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_ARM64_HYPERVISOR_H
+#define _ASM_ARM64_HYPERVISOR_H
+
+#include <asm/xen/hypervisor.h>
+
+#endif
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
new file mode 100644
index 000000000..f81b328d9
--- /dev/null
+++ b/arch/arm64/include/asm/insn.h
@@ -0,0 +1,372 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef	__ASM_INSN_H
+#define	__ASM_INSN_H
+#include <linux/types.h>
+
+/* A64 instructions are always 32 bits. */
+#define	AARCH64_INSN_SIZE		4
+
+#ifndef __ASSEMBLY__
+/*
+ * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
+ * Section C3.1 "A64 instruction index by encoding":
+ * AArch64 main encoding table
+ *  Bit position
+ *   28 27 26 25	Encoding Group
+ *   0  0  -  -		Unallocated
+ *   1  0  0  -		Data processing, immediate
+ *   1  0  1  -		Branch, exception generation and system instructions
+ *   -  1  -  0		Loads and stores
+ *   -  1  0  1		Data processing - register
+ *   0  1  1  1		Data processing - SIMD and floating point
+ *   1  1  1  1		Data processing - SIMD and floating point
+ * "-" means "don't care"
+ */
+enum aarch64_insn_encoding_class {
+	AARCH64_INSN_CLS_UNKNOWN,	/* UNALLOCATED */
+	AARCH64_INSN_CLS_DP_IMM,	/* Data processing - immediate */
+	AARCH64_INSN_CLS_DP_REG,	/* Data processing - register */
+	AARCH64_INSN_CLS_DP_FPSIMD,	/* Data processing - SIMD and FP */
+	AARCH64_INSN_CLS_LDST,		/* Loads and stores */
+	AARCH64_INSN_CLS_BR_SYS,	/* Branch, exception generation and
+					 * system instructions */
+};
+
+enum aarch64_insn_hint_op {
+	AARCH64_INSN_HINT_NOP	= 0x0 << 5,
+	AARCH64_INSN_HINT_YIELD	= 0x1 << 5,
+	AARCH64_INSN_HINT_WFE	= 0x2 << 5,
+	AARCH64_INSN_HINT_WFI	= 0x3 << 5,
+	AARCH64_INSN_HINT_SEV	= 0x4 << 5,
+	AARCH64_INSN_HINT_SEVL	= 0x5 << 5,
+};
+
+enum aarch64_insn_imm_type {
+	AARCH64_INSN_IMM_ADR,
+	AARCH64_INSN_IMM_26,
+	AARCH64_INSN_IMM_19,
+	AARCH64_INSN_IMM_16,
+	AARCH64_INSN_IMM_14,
+	AARCH64_INSN_IMM_12,
+	AARCH64_INSN_IMM_9,
+	AARCH64_INSN_IMM_7,
+	AARCH64_INSN_IMM_6,
+	AARCH64_INSN_IMM_S,
+	AARCH64_INSN_IMM_R,
+	AARCH64_INSN_IMM_MAX
+};
+
+enum aarch64_insn_register_type {
+	AARCH64_INSN_REGTYPE_RT,
+	AARCH64_INSN_REGTYPE_RN,
+	AARCH64_INSN_REGTYPE_RT2,
+	AARCH64_INSN_REGTYPE_RM,
+	AARCH64_INSN_REGTYPE_RD,
+	AARCH64_INSN_REGTYPE_RA,
+};
+
+enum aarch64_insn_register {
+	AARCH64_INSN_REG_0  = 0,
+	AARCH64_INSN_REG_1  = 1,
+	AARCH64_INSN_REG_2  = 2,
+	AARCH64_INSN_REG_3  = 3,
+	AARCH64_INSN_REG_4  = 4,
+	AARCH64_INSN_REG_5  = 5,
+	AARCH64_INSN_REG_6  = 6,
+	AARCH64_INSN_REG_7  = 7,
+	AARCH64_INSN_REG_8  = 8,
+	AARCH64_INSN_REG_9  = 9,
+	AARCH64_INSN_REG_10 = 10,
+	AARCH64_INSN_REG_11 = 11,
+	AARCH64_INSN_REG_12 = 12,
+	AARCH64_INSN_REG_13 = 13,
+	AARCH64_INSN_REG_14 = 14,
+	AARCH64_INSN_REG_15 = 15,
+	AARCH64_INSN_REG_16 = 16,
+	AARCH64_INSN_REG_17 = 17,
+	AARCH64_INSN_REG_18 = 18,
+	AARCH64_INSN_REG_19 = 19,
+	AARCH64_INSN_REG_20 = 20,
+	AARCH64_INSN_REG_21 = 21,
+	AARCH64_INSN_REG_22 = 22,
+	AARCH64_INSN_REG_23 = 23,
+	AARCH64_INSN_REG_24 = 24,
+	AARCH64_INSN_REG_25 = 25,
+	AARCH64_INSN_REG_26 = 26,
+	AARCH64_INSN_REG_27 = 27,
+	AARCH64_INSN_REG_28 = 28,
+	AARCH64_INSN_REG_29 = 29,
+	AARCH64_INSN_REG_FP = 29, /* Frame pointer */
+	AARCH64_INSN_REG_30 = 30,
+	AARCH64_INSN_REG_LR = 30, /* Link register */
+	AARCH64_INSN_REG_ZR = 31, /* Zero: as source register */
+	AARCH64_INSN_REG_SP = 31  /* Stack pointer: as load/store base reg */
+};
+
+enum aarch64_insn_variant {
+	AARCH64_INSN_VARIANT_32BIT,
+	AARCH64_INSN_VARIANT_64BIT
+};
+
+enum aarch64_insn_condition {
+	AARCH64_INSN_COND_EQ = 0x0, /* == */
+	AARCH64_INSN_COND_NE = 0x1, /* != */
+	AARCH64_INSN_COND_CS = 0x2, /* unsigned >= */
+	AARCH64_INSN_COND_CC = 0x3, /* unsigned < */
+	AARCH64_INSN_COND_MI = 0x4, /* < 0 */
+	AARCH64_INSN_COND_PL = 0x5, /* >= 0 */
+	AARCH64_INSN_COND_VS = 0x6, /* overflow */
+	AARCH64_INSN_COND_VC = 0x7, /* no overflow */
+	AARCH64_INSN_COND_HI = 0x8, /* unsigned > */
+	AARCH64_INSN_COND_LS = 0x9, /* unsigned <= */
+	AARCH64_INSN_COND_GE = 0xa, /* signed >= */
+	AARCH64_INSN_COND_LT = 0xb, /* signed < */
+	AARCH64_INSN_COND_GT = 0xc, /* signed > */
+	AARCH64_INSN_COND_LE = 0xd, /* signed <= */
+	AARCH64_INSN_COND_AL = 0xe, /* always */
+};
+
+enum aarch64_insn_branch_type {
+	AARCH64_INSN_BRANCH_NOLINK,
+	AARCH64_INSN_BRANCH_LINK,
+	AARCH64_INSN_BRANCH_RETURN,
+	AARCH64_INSN_BRANCH_COMP_ZERO,
+	AARCH64_INSN_BRANCH_COMP_NONZERO,
+};
+
+enum aarch64_insn_size_type {
+	AARCH64_INSN_SIZE_8,
+	AARCH64_INSN_SIZE_16,
+	AARCH64_INSN_SIZE_32,
+	AARCH64_INSN_SIZE_64,
+};
+
+enum aarch64_insn_ldst_type {
+	AARCH64_INSN_LDST_LOAD_REG_OFFSET,
+	AARCH64_INSN_LDST_STORE_REG_OFFSET,
+	AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
+	AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
+	AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
+	AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
+};
+
+enum aarch64_insn_adsb_type {
+	AARCH64_INSN_ADSB_ADD,
+	AARCH64_INSN_ADSB_SUB,
+	AARCH64_INSN_ADSB_ADD_SETFLAGS,
+	AARCH64_INSN_ADSB_SUB_SETFLAGS
+};
+
+enum aarch64_insn_movewide_type {
+	AARCH64_INSN_MOVEWIDE_ZERO,
+	AARCH64_INSN_MOVEWIDE_KEEP,
+	AARCH64_INSN_MOVEWIDE_INVERSE
+};
+
+enum aarch64_insn_bitfield_type {
+	AARCH64_INSN_BITFIELD_MOVE,
+	AARCH64_INSN_BITFIELD_MOVE_UNSIGNED,
+	AARCH64_INSN_BITFIELD_MOVE_SIGNED
+};
+
+enum aarch64_insn_data1_type {
+	AARCH64_INSN_DATA1_REVERSE_16,
+	AARCH64_INSN_DATA1_REVERSE_32,
+	AARCH64_INSN_DATA1_REVERSE_64,
+};
+
+enum aarch64_insn_data2_type {
+	AARCH64_INSN_DATA2_UDIV,
+	AARCH64_INSN_DATA2_SDIV,
+	AARCH64_INSN_DATA2_LSLV,
+	AARCH64_INSN_DATA2_LSRV,
+	AARCH64_INSN_DATA2_ASRV,
+	AARCH64_INSN_DATA2_RORV,
+};
+
+enum aarch64_insn_data3_type {
+	AARCH64_INSN_DATA3_MADD,
+	AARCH64_INSN_DATA3_MSUB,
+};
+
+enum aarch64_insn_logic_type {
+	AARCH64_INSN_LOGIC_AND,
+	AARCH64_INSN_LOGIC_BIC,
+	AARCH64_INSN_LOGIC_ORR,
+	AARCH64_INSN_LOGIC_ORN,
+	AARCH64_INSN_LOGIC_EOR,
+	AARCH64_INSN_LOGIC_EON,
+	AARCH64_INSN_LOGIC_AND_SETFLAGS,
+	AARCH64_INSN_LOGIC_BIC_SETFLAGS
+};
+
+#define	__AARCH64_INSN_FUNCS(abbr, mask, val)	\
+static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
+{ return (code & (mask)) == (val); } \
+static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
+{ return (val); }
+
+__AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
+__AARCH64_INSN_FUNCS(stp_post,	0x7FC00000, 0x28800000)
+__AARCH64_INSN_FUNCS(ldp_post,	0x7FC00000, 0x28C00000)
+__AARCH64_INSN_FUNCS(stp_pre,	0x7FC00000, 0x29800000)
+__AARCH64_INSN_FUNCS(ldp_pre,	0x7FC00000, 0x29C00000)
+__AARCH64_INSN_FUNCS(add_imm,	0x7F000000, 0x11000000)
+__AARCH64_INSN_FUNCS(adds_imm,	0x7F000000, 0x31000000)
+__AARCH64_INSN_FUNCS(sub_imm,	0x7F000000, 0x51000000)
+__AARCH64_INSN_FUNCS(subs_imm,	0x7F000000, 0x71000000)
+__AARCH64_INSN_FUNCS(movn,	0x7F800000, 0x12800000)
+__AARCH64_INSN_FUNCS(sbfm,	0x7F800000, 0x13000000)
+__AARCH64_INSN_FUNCS(bfm,	0x7F800000, 0x33000000)
+__AARCH64_INSN_FUNCS(movz,	0x7F800000, 0x52800000)
+__AARCH64_INSN_FUNCS(ubfm,	0x7F800000, 0x53000000)
+__AARCH64_INSN_FUNCS(movk,	0x7F800000, 0x72800000)
+__AARCH64_INSN_FUNCS(add,	0x7F200000, 0x0B000000)
+__AARCH64_INSN_FUNCS(adds,	0x7F200000, 0x2B000000)
+__AARCH64_INSN_FUNCS(sub,	0x7F200000, 0x4B000000)
+__AARCH64_INSN_FUNCS(subs,	0x7F200000, 0x6B000000)
+__AARCH64_INSN_FUNCS(madd,	0x7FE08000, 0x1B000000)
+__AARCH64_INSN_FUNCS(msub,	0x7FE08000, 0x1B008000)
+__AARCH64_INSN_FUNCS(udiv,	0x7FE0FC00, 0x1AC00800)
+__AARCH64_INSN_FUNCS(sdiv,	0x7FE0FC00, 0x1AC00C00)
+__AARCH64_INSN_FUNCS(lslv,	0x7FE0FC00, 0x1AC02000)
+__AARCH64_INSN_FUNCS(lsrv,	0x7FE0FC00, 0x1AC02400)
+__AARCH64_INSN_FUNCS(asrv,	0x7FE0FC00, 0x1AC02800)
+__AARCH64_INSN_FUNCS(rorv,	0x7FE0FC00, 0x1AC02C00)
+__AARCH64_INSN_FUNCS(rev16,	0x7FFFFC00, 0x5AC00400)
+__AARCH64_INSN_FUNCS(rev32,	0x7FFFFC00, 0x5AC00800)
+__AARCH64_INSN_FUNCS(rev64,	0x7FFFFC00, 0x5AC00C00)
+__AARCH64_INSN_FUNCS(and,	0x7F200000, 0x0A000000)
+__AARCH64_INSN_FUNCS(bic,	0x7F200000, 0x0A200000)
+__AARCH64_INSN_FUNCS(orr,	0x7F200000, 0x2A000000)
+__AARCH64_INSN_FUNCS(orn,	0x7F200000, 0x2A200000)
+__AARCH64_INSN_FUNCS(eor,	0x7F200000, 0x4A000000)
+__AARCH64_INSN_FUNCS(eon,	0x7F200000, 0x4A200000)
+__AARCH64_INSN_FUNCS(ands,	0x7F200000, 0x6A000000)
+__AARCH64_INSN_FUNCS(bics,	0x7F200000, 0x6A200000)
+__AARCH64_INSN_FUNCS(b,		0xFC000000, 0x14000000)
+__AARCH64_INSN_FUNCS(bl,	0xFC000000, 0x94000000)
+__AARCH64_INSN_FUNCS(cbz,	0x7F000000, 0x34000000)
+__AARCH64_INSN_FUNCS(cbnz,	0x7F000000, 0x35000000)
+__AARCH64_INSN_FUNCS(tbz,	0x7F000000, 0x36000000)
+__AARCH64_INSN_FUNCS(tbnz,	0x7F000000, 0x37000000)
+__AARCH64_INSN_FUNCS(bcond,	0xFF000010, 0x54000000)
+__AARCH64_INSN_FUNCS(svc,	0xFFE0001F, 0xD4000001)
+__AARCH64_INSN_FUNCS(hvc,	0xFFE0001F, 0xD4000002)
+__AARCH64_INSN_FUNCS(smc,	0xFFE0001F, 0xD4000003)
+__AARCH64_INSN_FUNCS(brk,	0xFFE0001F, 0xD4200000)
+__AARCH64_INSN_FUNCS(hint,	0xFFFFF01F, 0xD503201F)
+__AARCH64_INSN_FUNCS(br,	0xFFFFFC1F, 0xD61F0000)
+__AARCH64_INSN_FUNCS(blr,	0xFFFFFC1F, 0xD63F0000)
+__AARCH64_INSN_FUNCS(ret,	0xFFFFFC1F, 0xD65F0000)
+
+#undef	__AARCH64_INSN_FUNCS
+
+bool aarch64_insn_is_nop(u32 insn);
+
+int aarch64_insn_read(void *addr, u32 *insnp);
+int aarch64_insn_write(void *addr, u32 insn);
+enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
+u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+				  u32 insn, u64 imm);
+u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+				enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
+				     enum aarch64_insn_register reg,
+				     enum aarch64_insn_variant variant,
+				     enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
+				     enum aarch64_insn_condition cond);
+u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op);
+u32 aarch64_insn_gen_nop(void);
+u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
+				enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
+				    enum aarch64_insn_register base,
+				    enum aarch64_insn_register offset,
+				    enum aarch64_insn_size_type size,
+				    enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
+				     enum aarch64_insn_register reg2,
+				     enum aarch64_insn_register base,
+				     int offset,
+				     enum aarch64_insn_variant variant,
+				     enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
+				 enum aarch64_insn_register src,
+				 int imm, enum aarch64_insn_variant variant,
+				 enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
+			      enum aarch64_insn_register src,
+			      int immr, int imms,
+			      enum aarch64_insn_variant variant,
+			      enum aarch64_insn_bitfield_type type);
+u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
+			      int imm, int shift,
+			      enum aarch64_insn_variant variant,
+			      enum aarch64_insn_movewide_type type);
+u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
+					 enum aarch64_insn_register src,
+					 enum aarch64_insn_register reg,
+					 int shift,
+					 enum aarch64_insn_variant variant,
+					 enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
+			   enum aarch64_insn_register src,
+			   enum aarch64_insn_variant variant,
+			   enum aarch64_insn_data1_type type);
+u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
+			   enum aarch64_insn_register src,
+			   enum aarch64_insn_register reg,
+			   enum aarch64_insn_variant variant,
+			   enum aarch64_insn_data2_type type);
+u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
+			   enum aarch64_insn_register src,
+			   enum aarch64_insn_register reg1,
+			   enum aarch64_insn_register reg2,
+			   enum aarch64_insn_variant variant,
+			   enum aarch64_insn_data3_type type);
+u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
+					 enum aarch64_insn_register src,
+					 enum aarch64_insn_register reg,
+					 int shift,
+					 enum aarch64_insn_variant variant,
+					 enum aarch64_insn_logic_type type);
+
+bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
+
+int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
+bool aarch32_insn_is_wide(u32 insn);
+
+#define A32_RN_OFFSET	16
+#define A32_RT_OFFSET	12
+#define A32_RT2_OFFSET	 0
+
+u32 aarch32_insn_extract_reg_num(u32 insn, int offset);
+u32 aarch32_insn_mcr_extract_opc2(u32 insn);
+u32 aarch32_insn_mcr_extract_crm(u32 insn);
+#endif /* __ASSEMBLY__ */
+
+#endif	/* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
new file mode 100644
index 000000000..540f7c0ae
--- /dev/null
+++ b/arch/arm64/include/asm/io.h
@@ -0,0 +1,215 @@
+/*
+ * Based on arch/arm/include/asm/io.h
+ *
+ * Copyright (C) 1996-2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_IO_H
+#define __ASM_IO_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/blk_types.h>
+
+#include <asm/byteorder.h>
+#include <asm/barrier.h>
+#include <asm/memory.h>
+#include <asm/pgtable.h>
+#include <asm/early_ioremap.h>
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+
+#include <xen/xen.h>
+
+/*
+ * Generic IO read/write.  These perform native-endian accesses.
+ */
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+	asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+	asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+	asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+	asm volatile("str %0, [%1]" : : "r" (val), "r" (addr));
+}
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	u8 val;
+	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
+				 "ldarb %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	u16 val;
+
+	asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
+				 "ldarh %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	u32 val;
+	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
+				 "ldar %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	u64 val;
+	asm volatile(ALTERNATIVE("ldr %0, [%1]",
+				 "ldar %0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
+	return val;
+}
+
+/* IO barriers */
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
+
+#define mmiowb()		do { } while (0)
+
+/*
+ * Relaxed I/O memory access primitives. These follow the Device memory
+ * ordering rules but do not guarantee any ordering relative to Normal memory
+ * accesses.
+ */
+#define readb_relaxed(c)	({ u8  __v = __raw_readb(c); __v; })
+#define readw_relaxed(c)	({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; })
+#define readl_relaxed(c)	({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; })
+#define readq_relaxed(c)	({ u64 __v = le64_to_cpu((__force __le64)__raw_readq(c)); __v; })
+
+#define writeb_relaxed(v,c)	((void)__raw_writeb((v),(c)))
+#define writew_relaxed(v,c)	((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
+#define writel_relaxed(v,c)	((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define writeq_relaxed(v,c)	((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+
+/*
+ * I/O memory access primitives. Reads are ordered relative to any
+ * following Normal memory access. Writes are ordered relative to any prior
+ * Normal memory access.
+ */
+#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readq(c)		({ u64 __v = readq_relaxed(c); __iormb(); __v; })
+
+#define writeb(v,c)		({ __iowmb(); writeb_relaxed((v),(c)); })
+#define writew(v,c)		({ __iowmb(); writew_relaxed((v),(c)); })
+#define writel(v,c)		({ __iowmb(); writel_relaxed((v),(c)); })
+#define writeq(v,c)		({ __iowmb(); writeq_relaxed((v),(c)); })
+
+/*
+ *  I/O port access primitives.
+ */
+#define arch_has_dev_port()	(1)
+#define IO_SPACE_LIMIT		(PCI_IO_SIZE - 1)
+#define PCI_IOBASE		((void __iomem *)PCI_IO_START)
+
+/*
+ * String version of I/O memory access operations.
+ */
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c,v,l)	__memset_io((c),(v),(l))
+#define memcpy_fromio(a,c,l)	__memcpy_fromio((a),(c),(l))
+#define memcpy_toio(c,a,l)	__memcpy_toio((c),(a),(l))
+
+/*
+ * I/O memory mapping functions.
+ */
+extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot);
+extern void __iounmap(volatile void __iomem *addr);
+extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
+
+#define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
+#define ioremap_nocache(addr, size)	__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
+#define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
+#define iounmap				__iounmap
+
+/*
+ * io{read,write}{16,32}be() macros
+ */
+#define ioread16be(p)		({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
+#define ioread32be(p)		({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
+
+#define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
+#define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)	p
+
+#include <asm-generic/io.h>
+
+/*
+ * More restrictive address range checking than the default implementation
+ * (PHYS_OFFSET and PHYS_MASK taken into account).
+ */
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
+extern int devmem_is_allowed(unsigned long pfn);
+
+struct bio_vec;
+extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
+				      const struct bio_vec *vec2);
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
+	(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&				\
+	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
+
+#endif	/* __KERNEL__ */
+#endif	/* __ASM_IO_H */
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
new file mode 100644
index 000000000..bbb251b14
--- /dev/null
+++ b/arch/arm64/include/asm/irq.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_IRQ_H
+#define __ASM_IRQ_H
+
+#include <linux/irqchip/arm-gic-acpi.h>
+
+#include <asm-generic/irq.h>
+
+struct pt_regs;
+
+extern void migrate_irqs(void);
+extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
+
+static inline void acpi_irq_init(void)
+{
+	/*
+	 * Hardcode ACPI IRQ chip initialization to GICv2 for now.
+	 * Proper irqchip infrastructure will be implemented along with
+	 * incoming  GICv2m|GICv3|ITS bits.
+	 */
+	acpi_gic_init();
+}
+#define acpi_irq_init acpi_irq_init
+
+#endif
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
new file mode 100644
index 000000000..b4f6b19a8
--- /dev/null
+++ b/arch/arm64/include/asm/irq_work.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_IRQ_WORK_H
+#define __ASM_IRQ_WORK_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/smp.h>
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return !!__smp_cross_call;
+}
+
+#else
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return false;
+}
+
+#endif
+
+#endif /* __ASM_IRQ_WORK_H */
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
new file mode 100644
index 000000000..11cc941bd
--- /dev/null
+++ b/arch/arm64/include/asm/irqflags.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_IRQFLAGS_H
+#define __ASM_IRQFLAGS_H
+
+#ifdef __KERNEL__
+
+#include <asm/ptrace.h>
+
+/*
+ * CPU interrupt mask handling.
+ */
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+	asm volatile(
+		"mrs	%0, daif		// arch_local_irq_save\n"
+		"msr	daifset, #2"
+		: "=r" (flags)
+		:
+		: "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	asm volatile(
+		"msr	daifclr, #2		// arch_local_irq_enable"
+		:
+		:
+		: "memory");
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile(
+		"msr	daifset, #2		// arch_local_irq_disable"
+		:
+		:
+		: "memory");
+}
+
+#define local_fiq_enable()	asm("msr	daifclr, #1" : : : "memory")
+#define local_fiq_disable()	asm("msr	daifset, #1" : : : "memory")
+
+#define local_async_enable()	asm("msr	daifclr, #4" : : : "memory")
+#define local_async_disable()	asm("msr	daifset, #4" : : : "memory")
+
+/*
+ * Save the current interrupt enable state.
+ */
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile(
+		"mrs	%0, daif		// arch_local_save_flags"
+		: "=r" (flags)
+		:
+		: "memory");
+	return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile(
+		"msr	daif, %0		// arch_local_irq_restore"
+	:
+	: "r" (flags)
+	: "memory");
+}
+
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags & PSR_I_BIT;
+}
+
+/*
+ * save and restore debug state
+ */
+#define local_dbg_save(flags)						\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		asm volatile(						\
+		"mrs    %0, daif		// local_dbg_save\n"	\
+		"msr    daifset, #8"					\
+		: "=r" (flags) : : "memory");				\
+	} while (0)
+
+#define local_dbg_restore(flags)					\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		asm volatile(						\
+		"msr    daif, %0		// local_dbg_restore\n"	\
+		: : "r" (flags) : "memory");				\
+	} while (0)
+
+#define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
+#define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
new file mode 100644
index 000000000..c0e5165c2
--- /dev/null
+++ b/arch/arm64/include/asm/jump_label.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Based on arch/arm/include/asm/jump_label.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_JUMP_LABEL_H
+#define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/insn.h>
+
+#define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
+
+static __always_inline bool arch_static_branch(struct static_key *key)
+{
+	asm goto("1: nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 3\n\t"
+		 ".quad 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 :  :  "i"(key) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+typedef u64 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif  /* __ASSEMBLY__ */
+#endif	/* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
new file mode 100644
index 000000000..f69f69c81
--- /dev/null
+++ b/arch/arm64/include/asm/kgdb.h
@@ -0,0 +1,84 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/include/kgdb.h
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KGDB_H
+#define __ARM_KGDB_H
+
+#include <linux/ptrace.h>
+#include <asm/debug-monitors.h>
+
+#ifndef	__ASSEMBLY__
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	asm ("brk %0" : : "I" (KGDB_COMPILED_DBG_BRK_IMM));
+}
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * gdb is expecting the following registers layout.
+ *
+ * General purpose regs:
+ *     r0-r30: 64 bit
+ *     sp,pc : 64 bit
+ *     pstate  : 64 bit
+ *     Total: 34
+ * FPU regs:
+ *     f0-f31: 128 bit
+ *     Total: 32
+ * Extra regs
+ *     fpsr & fpcr: 32 bit
+ *     Total: 2
+ *
+ */
+
+#define _GP_REGS		34
+#define _FP_REGS		32
+#define _EXTRA_REGS		2
+/*
+ * general purpose registers size in bytes.
+ * pstate is only 4 bytes. subtract 4 bytes
+ */
+#define GP_REG_BYTES		(_GP_REGS * 8)
+#define DBG_MAX_REG_NUM		(_GP_REGS + _FP_REGS + _EXTRA_REGS)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX			2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+
+#define NUMREGBYTES	((_GP_REGS * 8) + (_FP_REGS * 16) + \
+			(_EXTRA_REGS * 4))
+
+#endif /* __ASM_KGDB_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
new file mode 100644
index 000000000..ac6fafb95
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_ARM_H__
+#define __ARM64_KVM_ARM_H__
+
+#include <asm/esr.h>
+#include <asm/memory.h>
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_ID		(UL(1) << 33)
+#define HCR_CD		(UL(1) << 32)
+#define HCR_RW_SHIFT	31
+#define HCR_RW		(UL(1) << HCR_RW_SHIFT)
+#define HCR_TRVM	(UL(1) << 30)
+#define HCR_HCD		(UL(1) << 29)
+#define HCR_TDZ		(UL(1) << 28)
+#define HCR_TGE		(UL(1) << 27)
+#define HCR_TVM		(UL(1) << 26)
+#define HCR_TTLB	(UL(1) << 25)
+#define HCR_TPU		(UL(1) << 24)
+#define HCR_TPC		(UL(1) << 23)
+#define HCR_TSW		(UL(1) << 22)
+#define HCR_TAC		(UL(1) << 21)
+#define HCR_TIDCP	(UL(1) << 20)
+#define HCR_TSC		(UL(1) << 19)
+#define HCR_TID3	(UL(1) << 18)
+#define HCR_TID2	(UL(1) << 17)
+#define HCR_TID1	(UL(1) << 16)
+#define HCR_TID0	(UL(1) << 15)
+#define HCR_TWE		(UL(1) << 14)
+#define HCR_TWI		(UL(1) << 13)
+#define HCR_DC		(UL(1) << 12)
+#define HCR_BSU		(3 << 10)
+#define HCR_BSU_IS	(UL(1) << 10)
+#define HCR_FB		(UL(1) << 9)
+#define HCR_VA		(UL(1) << 8)
+#define HCR_VI		(UL(1) << 7)
+#define HCR_VF		(UL(1) << 6)
+#define HCR_AMO		(UL(1) << 5)
+#define HCR_IMO		(UL(1) << 4)
+#define HCR_FMO		(UL(1) << 3)
+#define HCR_PTW		(UL(1) << 2)
+#define HCR_SWIO	(UL(1) << 1)
+#define HCR_VM		(UL(1) << 0)
+
+/*
+ * The bits we set in HCR:
+ * RW:		64bit by default, can be overriden for 32bit VMs
+ * TAC:		Trap ACTLR
+ * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until M+C set in SCTLR_EL1)
+ * TSW:		Trap cache operations by set/way
+ * TWE:		Trap WFE
+ * TWI:		Trap WFI
+ * TIDCP:	Trap L2CTLR/L2ECTLR
+ * BSU_IS:	Upgrade barriers to the inner shareable domain
+ * FB:		Force broadcast of all maintainance operations
+ * AMO:		Override CPSR.A and enable signaling with VA
+ * IMO:		Override CPSR.I and enable signaling with VI
+ * FMO:		Override CPSR.F and enable signaling with VF
+ * SWIO:	Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
+			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
+
+
+/* Hyp System Control Register (SCTLR_EL2) bits */
+#define SCTLR_EL2_EE	(1 << 25)
+#define SCTLR_EL2_WXN	(1 << 19)
+#define SCTLR_EL2_I	(1 << 12)
+#define SCTLR_EL2_SA	(1 << 3)
+#define SCTLR_EL2_C	(1 << 2)
+#define SCTLR_EL2_A	(1 << 1)
+#define SCTLR_EL2_M	1
+#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
+			 SCTLR_EL2_SA | SCTLR_EL2_I)
+
+/* TCR_EL2 Registers bits */
+#define TCR_EL2_TBI	(1 << 20)
+#define TCR_EL2_PS	(7 << 16)
+#define TCR_EL2_PS_40B	(2 << 16)
+#define TCR_EL2_TG0	(1 << 14)
+#define TCR_EL2_SH0	(3 << 12)
+#define TCR_EL2_ORGN0	(3 << 10)
+#define TCR_EL2_IRGN0	(3 << 8)
+#define TCR_EL2_T0SZ	0x3f
+#define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
+			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+
+#define TCR_EL2_FLAGS	(TCR_EL2_PS_40B)
+
+/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_PS_MASK	(7 << 16)
+#define VTCR_EL2_TG0_MASK	(1 << 14)
+#define VTCR_EL2_TG0_4K		(0 << 14)
+#define VTCR_EL2_TG0_64K	(1 << 14)
+#define VTCR_EL2_SH0_MASK	(3 << 12)
+#define VTCR_EL2_SH0_INNER	(3 << 12)
+#define VTCR_EL2_ORGN0_MASK	(3 << 10)
+#define VTCR_EL2_ORGN0_WBWA	(1 << 10)
+#define VTCR_EL2_IRGN0_MASK	(3 << 8)
+#define VTCR_EL2_IRGN0_WBWA	(1 << 8)
+#define VTCR_EL2_SL0_MASK	(3 << 6)
+#define VTCR_EL2_SL0_LVL1	(1 << 6)
+#define VTCR_EL2_T0SZ_MASK	0x3f
+#define VTCR_EL2_T0SZ_40B	24
+
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
+ * (see hyp-init.S).
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+/*
+ * Stage2 translation configuration:
+ * 40bits input  (T0SZ = 24)
+ * 64kB pages (TG0 = 1)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
+#else
+/*
+ * Stage2 translation configuration:
+ * 40bits input  (T0SZ = 24)
+ * 4kB pages (TG0 = 0)
+ * 3 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
+#endif
+
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (UL(48))
+#define VTTBR_VMID_MASK	  (UL(0xFF) << VTTBR_VMID_SHIFT)
+
+/* Hyp System Trap Register */
+#define HSTR_EL2_TTEE	(1 << 16)
+#define HSTR_EL2_T(x)	(1 << x)
+
+/* Hyp Coprocessor Trap Register */
+#define CPTR_EL2_TCPAC	(1 << 31)
+#define CPTR_EL2_TTA	(1 << 20)
+#define CPTR_EL2_TFP	(1 << 10)
+
+/* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TDRA		(1 << 11)
+#define MDCR_EL2_TDOSA		(1 << 10)
+#define MDCR_EL2_TDA		(1 << 9)
+#define MDCR_EL2_TDE		(1 << 8)
+#define MDCR_EL2_HPME		(1 << 7)
+#define MDCR_EL2_TPM		(1 << 6)
+#define MDCR_EL2_TPMCR		(1 << 5)
+#define MDCR_EL2_HPMN_MASK	(0x1F)
+
+/* For compatibility with fault code shared with 32-bit */
+#define FSC_FAULT	ESR_ELx_FSC_FAULT
+#define FSC_ACCESS	ESR_ELx_FSC_ACCESS
+#define FSC_PERM	ESR_ELx_FSC_PERM
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK	(~UL(0xf))
+
+#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
new file mode 100644
index 000000000..4f7310fa7
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+#include <asm/virt.h>
+
+/*
+ * 0 is reserved as an invalid value.
+ * Order *must* be kept in sync with the hyp switch code.
+ */
+#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
+#define	CSSELR_EL1	2	/* Cache Size Selection Register */
+#define	SCTLR_EL1	3	/* System Control Register */
+#define	ACTLR_EL1	4	/* Auxilliary Control Register */
+#define	CPACR_EL1	5	/* Coprocessor Access Control */
+#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
+#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
+#define	TCR_EL1		8	/* Translation Control Register */
+#define	ESR_EL1		9	/* Exception Syndrome Register */
+#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
+#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
+#define	FAR_EL1		12	/* Fault Address Register */
+#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
+#define	VBAR_EL1	14	/* Vector Base Address Register */
+#define	CONTEXTIDR_EL1	15	/* Context ID Register */
+#define	TPIDR_EL0	16	/* Thread ID, User R/W */
+#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
+#define	TPIDR_EL1	18	/* Thread ID, Privileged */
+#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
+#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	PAR_EL1		21	/* Physical Address Register */
+#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
+#define DBGBCR0_EL1	23	/* Debug Breakpoint Control Registers (0-15) */
+#define DBGBCR15_EL1	38
+#define DBGBVR0_EL1	39	/* Debug Breakpoint Value Registers (0-15) */
+#define DBGBVR15_EL1	54
+#define DBGWCR0_EL1	55	/* Debug Watchpoint Control Registers (0-15) */
+#define DBGWCR15_EL1	70
+#define DBGWVR0_EL1	71	/* Debug Watchpoint Value Registers (0-15) */
+#define DBGWVR15_EL1	86
+#define MDCCINT_EL1	87	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
+/* 32bit specific registers. Keep them at the end of the range */
+#define	DACR32_EL2	88	/* Domain Access Control Register */
+#define	IFSR32_EL2	89	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	90	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	91	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	92	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	93	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	94
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
+
+#define ARM_EXCEPTION_IRQ	  0
+#define ARM_EXCEPTION_TRAP	  1
+
+#define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
+#define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+
+#ifndef __ASSEMBLY__
+struct kvm;
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_vector[];
+
+#define	__kvm_hyp_code_start	__hyp_text_start
+#define	__kvm_hyp_code_end	__hyp_text_end
+
+extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
+
+#endif
+
+#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
new file mode 100644
index 000000000..0b52377a6
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_coproc.h
+ * Copyright (C) 2012 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_COPROC_H__
+#define __ARM64_KVM_COPROC_H__
+
+#include <linux/kvm_host.h>
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+struct kvm_sys_reg_table {
+	const struct sys_reg_desc *table;
+	size_t num;
+};
+
+struct kvm_sys_reg_target_table {
+	struct kvm_sys_reg_table table64;
+	struct kvm_sys_reg_table table32;
+};
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table);
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#define kvm_coproc_table_init kvm_sys_reg_table_init
+void kvm_sys_reg_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
new file mode 100644
index 000000000..17e92f05b
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/kvm_emulate.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_EMULATE_H__
+#define __ARM64_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+
+#include <asm/esr.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
+#include <asm/ptrace.h>
+#include <asm/cputype.h>
+
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
+		vcpu->arch.hcr_el2 &= ~HCR_RW;
+}
+
+static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hcr_el2;
+}
+
+static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+{
+	vcpu->arch.hcr_el2 = hcr;
+}
+
+static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
+}
+
+static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
+}
+
+static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+}
+
+static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
+}
+
+static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return kvm_condition_valid32(vcpu);
+
+	return true;
+}
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		kvm_skip_instr32(vcpu, is_wide_instr);
+	else
+		*vcpu_pc(vcpu) += 4;
+}
+
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
+}
+
+static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_reg32(vcpu, reg_num);
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
+
+/* Get vcpu SPSR for current mode */
+static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_spsr32(vcpu);
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
+{
+	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+
+	if (vcpu_mode_is_32bit(vcpu))
+		return mode > COMPAT_PSR_MODE_USR;
+
+	return mode != PSR_MODE_EL0t;
+}
+
+static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.esr_el2;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.far_el2;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+{
+	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+}
+
+static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR);
+}
+
+static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
+}
+
+static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+{
+	return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA);
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
+}
+
+static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+{
+	return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL);
+}
+
+static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) >> ESR_ELx_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
+}
+
+static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
+{
+	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+}
+
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
+	else
+		vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+}
+
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
+
+	return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		case 4:
+			return be32_to_cpu(data & 0xffffffff);
+		default:
+			return be64_to_cpu(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		case 4:
+			return le32_to_cpu(data & 0xffffffff);
+		default:
+			return le64_to_cpu(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		case 4:
+			return cpu_to_be32(data & 0xffffffff);
+		default:
+			return cpu_to_be64(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		case 4:
+			return cpu_to_le32(data & 0xffffffff);
+		default:
+			return cpu_to_le64(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
new file mode 100644
index 000000000..f0f58c9be
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_host.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HOST_H__
+#define __ARM64_KVM_HOST_H__
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
+
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
+#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
+#define KVM_USER_MEM_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
+
+#define KVM_VCPU_MAX_FEATURES 3
+
+int __attribute_const__ kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+int kvm_arch_dev_ioctl_check_extension(long ext);
+
+struct kvm_arch {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* 1-level 2nd stage table and lock */
+	spinlock_t pgd_lock;
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
+	u64    vttbr;
+
+	/* The maximum number of vCPUs depends on the used GIC model */
+	int max_vcpus;
+
+	/* Interrupt controller */
+	struct vgic_dist	vgic;
+
+	/* Timer */
+	struct arch_timer_kvm	timer;
+};
+
+#define KVM_NR_MEM_OBJS     40
+
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
+struct kvm_mmu_memory_cache {
+	int nobjs;
+	void *objects[KVM_NR_MEM_OBJS];
+};
+
+struct kvm_vcpu_fault_info {
+	u32 esr_el2;		/* Hyp Syndrom Register */
+	u64 far_el2;		/* Hyp Fault Address Register */
+	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+};
+
+struct kvm_cpu_context {
+	struct kvm_regs	gp_regs;
+	union {
+		u64 sys_regs[NR_SYS_REGS];
+		u32 copro[NR_COPRO_REGS];
+	};
+};
+
+typedef struct kvm_cpu_context kvm_cpu_context_t;
+
+struct kvm_vcpu_arch {
+	struct kvm_cpu_context ctxt;
+
+	/* HYP configuration */
+	u64 hcr_el2;
+
+	/* Exception Information */
+	struct kvm_vcpu_fault_info fault;
+
+	/* Debug state */
+	u64 debug_flags;
+
+	/* Pointer to host CPU context */
+	kvm_cpu_context_t *host_cpu_context;
+
+	/* VGIC state */
+	struct vgic_cpu vgic_cpu;
+	struct arch_timer_cpu timer_cpu;
+
+	/*
+	 * Anything that is not used directly from assembly code goes
+	 * here.
+	 */
+
+	/* Don't run the guest */
+	bool pause;
+
+	/* IO related fields */
+	struct kvm_decode mmio_decode;
+
+	/* Interrupt related fields */
+	u64 irq_lines;		/* IRQ and FIQ levels */
+
+	/* Cache some mmu pages needed inside spinlock regions */
+	struct kvm_mmu_memory_cache mmu_page_cache;
+
+	/* Target CPU and feature flags */
+	int target;
+	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+
+	/* Detect first run of a vcpu */
+	bool has_run_once;
+};
+
+#define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
+#define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
+/*
+ * CP14 and CP15 live in the same array, as they are backed by the
+ * same system registers.
+ */
+#define vcpu_cp14(v,r)		((v)->arch.ctxt.copro[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r))
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r) + 1)
+#else
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r) + 1)
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r))
+#endif
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+	u32 halt_successful_poll;
+	u32 halt_wakeup;
+};
+
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+							 unsigned long address)
+{
+}
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
+
+u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index);
+
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
+				       unsigned long hyp_stack_ptr,
+				       unsigned long vector_ptr)
+{
+	/*
+	 * Call initialization code, and switch to the full blown
+	 * HYP code.
+	 */
+	kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+		     hyp_stack_ptr, vector_ptr);
+}
+
+struct vgic_sr_vectors {
+	void	*save_vgic;
+	void	*restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+	switch(vgic->type)
+	{
+	case VGIC_V2:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v2_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v2_state;
+		break;
+
+#ifdef CONFIG_ARM_GIC_V3
+	case VGIC_V3:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v3_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v3_state;
+		break;
+#endif
+
+	default:
+		BUG();
+	}
+}
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
+#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
new file mode 100644
index 000000000..889c908ee
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMIO_H__
+#define __ARM64_KVM_MMIO_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * This is annoying. The mmio code requires this, even if we don't
+ * need any decoding. To be fixed.
+ */
+struct kvm_decode {
+	unsigned long rt;
+	bool sign_extend;
+};
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa);
+
+#endif	/* __ARM64_KVM_MMIO_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
new file mode 100644
index 000000000..61505676d
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMU_H__
+#define __ARM64_KVM_MMU_H__
+
+#include <asm/page.h>
+#include <asm/memory.h>
+
+/*
+ * As we only have the TTBR0_EL2 register, we cannot express
+ * "negative" addresses. This makes it impossible to directly share
+ * mappings with the kernel.
+ *
+ * Instead, give the HYP mode its own VA region at a fixed offset from
+ * the kernel by just masking the top bits (which are all ones for a
+ * kernel address).
+ */
+#define HYP_PAGE_OFFSET_SHIFT	VA_BITS
+#define HYP_PAGE_OFFSET_MASK	((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
+#define HYP_PAGE_OFFSET		(PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
+
+/*
+ * Our virtual mapping for the idmap-ed MMU-enable code. Must be
+ * shared across all the page-tables. Conveniently, we use the last
+ * possible page, where no kernel mapping will ever exist.
+ */
+#define TRAMPOLINE_VA		(HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+
+/*
+ * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
+ * levels in addition to the PGD and potentially the PUD which are
+ * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
+ * tables use one level of tables less than the kernel.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define KVM_MMU_CACHE_MIN_PAGES	1
+#else
+#define KVM_MMU_CACHE_MIN_PAGES	2
+#endif
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Convert a kernel VA into a HYP VA.
+ * reg: VA to be converted.
+ */
+.macro kern_hyp_va	reg
+	and	\reg, \reg, #HYP_PAGE_OFFSET_MASK
+.endm
+
+#else
+
+#include <asm/pgalloc.h>
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+
+#define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+
+/*
+ * We currently only support a 40bit IPA.
+ */
+#define KVM_PHYS_SHIFT	(40)
+#define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
+
+int create_hyp_mappings(void *from, void *to);
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+void free_boot_hyp_pgd(void);
+void free_hyp_pgds(void);
+
+void stage2_unmap_vm(struct kvm *kvm);
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size, bool writable);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
+phys_addr_t kvm_mmu_get_httbr(void);
+phys_addr_t kvm_mmu_get_boot_httbr(void);
+phys_addr_t kvm_get_idmap_vector(void);
+int kvm_mmu_init(void);
+void kvm_clear_hyp_idmap(void);
+
+#define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
+#define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
+
+static inline void kvm_clean_pgd(pgd_t *pgd) {}
+static inline void kvm_clean_pmd(pmd_t *pmd) {}
+static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
+static inline void kvm_clean_pte(pte_t *pte) {}
+static inline void kvm_clean_pte_entry(pte_t *pte) {}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+	pte_val(*pte) |= PTE_S2_RDWR;
+}
+
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= PMD_S2_RDWR;
+}
+
+static inline void kvm_set_s2pte_readonly(pte_t *pte)
+{
+	pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY;
+}
+
+static inline bool kvm_s2pte_readonly(pte_t *pte)
+{
+	return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+}
+
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+{
+	pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY;
+}
+
+static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+{
+	return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY;
+}
+
+
+#define kvm_pgd_addr_end(addr, end)	pgd_addr_end(addr, end)
+#define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
+#define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
+
+/*
+ * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
+ * the entire IPA input range with a single pgd entry, and we would only need
+ * one pgd entry.  Note that in this case, the pgd is actually not used by
+ * the MMU for Stage-2 translations, but is merely a fake pgd used as a data
+ * structure for the kernel pgtable macros to work.
+ */
+#if PGDIR_SHIFT > KVM_PHYS_SHIFT
+#define PTRS_PER_S2_PGD_SHIFT	0
+#else
+#define PTRS_PER_S2_PGD_SHIFT	(KVM_PHYS_SHIFT - PGDIR_SHIFT)
+#endif
+#define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
+#define S2_PGD_ORDER		get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
+#define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
+/*
+ * If we are concatenating first level stage-2 page tables, we would have less
+ * than or equal to 16 pointers in the fake PGD, because that's what the
+ * architecture allows.  In this case, (4 - CONFIG_PGTABLE_LEVELS)
+ * represents the first level for the host, and we add 1 to go to the next
+ * level (which uses contatenation) for the stage-2 tables.
+ */
+#if PTRS_PER_S2_PGD <= 16
+#define KVM_PREALLOC_LEVEL	(4 - CONFIG_PGTABLE_LEVELS + 1)
+#else
+#define KVM_PREALLOC_LEVEL	(0)
+#endif
+
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
+{
+	pgd_t *pgd = kvm->arch.pgd;
+	pud_t *pud;
+
+	if (KVM_PREALLOC_LEVEL == 0)
+		return pgd;
+
+	pud = pud_offset(pgd, 0);
+	if (KVM_PREALLOC_LEVEL == 1)
+		return pud;
+
+	BUG_ON(KVM_PREALLOC_LEVEL != 2);
+	return pmd_offset(pud, 0);
+}
+
+static inline unsigned int kvm_get_hwpgd_size(void)
+{
+	if (KVM_PREALLOC_LEVEL > 0)
+		return PTRS_PER_S2_PGD * PAGE_SIZE;
+	return PTRS_PER_S2_PGD * sizeof(pgd_t);
+}
+
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define kvm_pmd_table_empty(kvm, pmdp) (0)
+#else
+#define kvm_pmd_table_empty(kvm, pmdp) \
+	(kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
+#endif
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define kvm_pud_table_empty(kvm, pudp) (0)
+#else
+#define kvm_pud_table_empty(kvm, pudp) \
+	(kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
+#endif
+
+
+struct kvm;
+
+#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+}
+
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+					       unsigned long size,
+					       bool ipa_uncached)
+{
+	void *va = page_address(pfn_to_page(pfn));
+
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
+		kvm_flush_dcache_to_poc(va, size);
+
+	if (!icache_is_aliasing()) {		/* PIPT */
+		flush_icache_range((unsigned long)va,
+				   (unsigned long)va + size);
+	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
+		/* any kind of VIPT cache */
+		__flush_icache_all();
+	}
+}
+
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+	struct page *page = pte_page(pte);
+	kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+}
+
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	struct page *page = pmd_page(pmd);
+	kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+}
+
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+	struct page *page = pud_page(pud);
+	kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+}
+
+#define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
+
+void kvm_set_way_flush(struct kvm_vcpu *vcpu);
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
+
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+	return __cpu_uses_extended_idmap();
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+				       pgd_t *hyp_pgd,
+				       pgd_t *merged_hyp_pgd,
+				       unsigned long hyp_idmap_start)
+{
+	int idmap_idx;
+
+	/*
+	 * Use the first entry to access the HYP mappings. It is
+	 * guaranteed to be free, otherwise we wouldn't use an
+	 * extended idmap.
+	 */
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
+	merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE);
+
+	/*
+	 * Create another extended level entry that points to the boot HYP map,
+	 * which contains an ID mapping of the HYP init code. We essentially
+	 * merge the boot and runtime HYP maps by doing so, but they don't
+	 * overlap anyway, so this is fine.
+	 */
+	idmap_idx = hyp_idmap_start >> VA_BITS;
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
+	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
new file mode 100644
index 000000000..bc39e557c
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_PSCI_H__
+#define __ARM64_KVM_PSCI_H__
+
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
new file mode 100644
index 000000000..636c1bced
--- /dev/null
+++ b/arch/arm64/include/asm/linkage.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN		.align 4
+#define __ALIGN_STR	".align 4"
+
+#endif
diff --git a/arch/arm64/include/asm/memblock.h b/arch/arm64/include/asm/memblock.h
new file mode 100644
index 000000000..6afeed246
--- /dev/null
+++ b/arch/arm64/include/asm/memblock.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_MEMBLOCK_H
+#define __ASM_MEMBLOCK_H
+
+extern void arm64_memblock_init(void);
+
+#endif
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
new file mode 100644
index 000000000..f800d45ea
--- /dev/null
+++ b/arch/arm64/include/asm/memory.h
@@ -0,0 +1,164 @@
+/*
+ * Based on arch/arm/include/asm/memory.h
+ *
+ * Copyright (C) 2000-2002 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: this file should not be included by non-asm/.h files
+ */
+#ifndef __ASM_MEMORY_H
+#define __ASM_MEMORY_H
+
+#include <linux/compiler.h>
+#include <linux/const.h>
+#include <linux/types.h>
+#include <asm/sizes.h>
+
+/*
+ * Allow for constants defined here to be used from assembly code
+ * by prepending the UL suffix only with actual C code compilation.
+ */
+#define UL(x) _AC(x, UL)
+
+/*
+ * Size of the PCI I/O space. This must remain a power of two so that
+ * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.
+ */
+#define PCI_IO_SIZE		SZ_16M
+
+/*
+ * PAGE_OFFSET - the virtual address of the start of the kernel image (top
+ *		 (VA_BITS - 1))
+ * VA_BITS - the maximum number of bits for virtual addresses.
+ * TASK_SIZE - the maximum size of a user space task.
+ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
+ * The module space lives between the addresses given by TASK_SIZE
+ * and PAGE_OFFSET - it must be within 128MB of the kernel text.
+ */
+#define VA_BITS			(CONFIG_ARM64_VA_BITS)
+#define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
+#define MODULES_END		(PAGE_OFFSET)
+#define MODULES_VADDR		(MODULES_END - SZ_64M)
+#define PCI_IO_END		(MODULES_VADDR - SZ_2M)
+#define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
+#define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
+#define TASK_SIZE_64		(UL(1) << VA_BITS)
+
+#ifdef CONFIG_COMPAT
+#define TASK_SIZE_32		UL(0x100000000)
+#define TASK_SIZE		(test_thread_flag(TIF_32BIT) ? \
+				TASK_SIZE_32 : TASK_SIZE_64)
+#define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_32BIT) ? \
+				TASK_SIZE_32 : TASK_SIZE_64)
+#else
+#define TASK_SIZE		TASK_SIZE_64
+#endif /* CONFIG_COMPAT */
+
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
+
+#if TASK_SIZE_64 > MODULES_VADDR
+#error Top of 64-bit user space clashes with start of module space
+#endif
+
+/*
+ * Physical vs virtual RAM address space conversion.  These are
+ * private definitions which should NOT be used outside memory.h
+ * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ */
+#define __virt_to_phys(x)	(((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
+#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+
+/*
+ * Convert a physical address to a Page Frame Number and back
+ */
+#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
+#define	__pfn_to_phys(pfn)	((phys_addr_t)(pfn) << PAGE_SHIFT)
+
+/*
+ * Convert a page to/from a physical address
+ */
+#define page_to_phys(page)	(__pfn_to_phys(page_to_pfn(page)))
+#define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
+
+/*
+ * Memory types available.
+ */
+#define MT_DEVICE_nGnRnE	0
+#define MT_DEVICE_nGnRE		1
+#define MT_DEVICE_GRE		2
+#define MT_NORMAL_NC		3
+#define MT_NORMAL		4
+
+/*
+ * Memory types for Stage-2 translation
+ */
+#define MT_S2_NORMAL		0xf
+#define MT_S2_DEVICE_nGnRE	0x1
+
+#ifndef __ASSEMBLY__
+
+extern phys_addr_t		memstart_addr;
+/* PHYS_OFFSET - the physical address of the start of memory. */
+#define PHYS_OFFSET		({ memstart_addr; })
+
+/*
+ * PFNs are used to describe any physical page; this means
+ * PFN 0 == physical address 0.
+ *
+ * This is the PFN of the first RAM page in the kernel
+ * direct-mapped view.  We assume this is the first page
+ * of RAM in the mem_map as well.
+ */
+#define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)
+
+/*
+ * Note: Drivers should NOT use these.  They are the wrong
+ * translation for translating DMA addresses.  Use the driver
+ * DMA support - see dma-mapping.h.
+ */
+#define virt_to_phys virt_to_phys
+static inline phys_addr_t virt_to_phys(const volatile void *x)
+{
+	return __virt_to_phys((unsigned long)(x));
+}
+
+#define phys_to_virt phys_to_virt
+static inline void *phys_to_virt(phys_addr_t x)
+{
+	return (void *)(__phys_to_virt(x));
+}
+
+/*
+ * Drivers should NOT use these either.
+ */
+#define __pa(x)			__virt_to_phys((unsigned long)(x))
+#define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+#define virt_to_pfn(x)      __phys_to_pfn(__virt_to_phys(x))
+
+/*
+ *  virt_to_page(k)	convert a _valid_ virtual address to struct page *
+ *  virt_addr_valid(k)	indicates whether a virtual address is valid
+ */
+#define ARCH_PFN_OFFSET		((unsigned long)PHYS_PFN_OFFSET)
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define	virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#endif
+
+#include <asm-generic/memory_model.h>
+
+#endif
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
new file mode 100644
index 000000000..3d311761e
--- /dev/null
+++ b/arch/arm64/include/asm/mmu.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_MMU_H
+#define __ASM_MMU_H
+
+typedef struct {
+	unsigned int id;
+	raw_spinlock_t id_lock;
+	void *vdso;
+} mm_context_t;
+
+#define INIT_MM_CONTEXT(name) \
+	.context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
+
+#define ASID(mm)	((mm)->context.id & 0xffff)
+
+extern void paging_init(void);
+extern void setup_mm_for_reboot(void);
+extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
+extern void init_mem_pgprot(void);
+extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
+			       unsigned long virt, phys_addr_t size,
+			       pgprot_t prot);
+
+#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
new file mode 100644
index 000000000..8ec41e5f5
--- /dev/null
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -0,0 +1,213 @@
+/*
+ * Based on arch/arm/include/asm/mmu_context.h
+ *
+ * Copyright (C) 1996 Russell King.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_MMU_CONTEXT_H
+#define __ASM_MMU_CONTEXT_H
+
+#include <linux/compiler.h>
+#include <linux/sched.h>
+
+#include <asm/cacheflush.h>
+#include <asm/proc-fns.h>
+#include <asm-generic/mm_hooks.h>
+#include <asm/cputype.h>
+#include <asm/pgtable.h>
+
+#define MAX_ASID_BITS	16
+
+extern unsigned int cpu_last_asid;
+
+void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void __new_context(struct mm_struct *mm);
+
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+static inline void contextidr_thread_switch(struct task_struct *next)
+{
+	asm(
+	"	msr	contextidr_el1, %0\n"
+	"	isb"
+	:
+	: "r" (task_pid_nr(next)));
+}
+#else
+static inline void contextidr_thread_switch(struct task_struct *next)
+{
+}
+#endif
+
+/*
+ * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0.
+ */
+static inline void cpu_set_reserved_ttbr0(void)
+{
+	unsigned long ttbr = page_to_phys(empty_zero_page);
+
+	asm(
+	"	msr	ttbr0_el1, %0			// set TTBR0\n"
+	"	isb"
+	:
+	: "r" (ttbr));
+}
+
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline bool __cpu_uses_extended_idmap(void)
+{
+	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
+		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
+}
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (__cpu_uses_extended_idmap())
+		asm volatile (
+		"	mrs	%0, tcr_el1	;"
+		"	bfi	%0, %1, %2, %3	;"
+		"	msr	tcr_el1, %0	;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
+static inline void switch_new_context(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	__new_context(mm);
+
+	local_irq_save(flags);
+	cpu_switch_mm(mm->pgd, mm);
+	local_irq_restore(flags);
+}
+
+static inline void check_and_switch_context(struct mm_struct *mm,
+					    struct task_struct *tsk)
+{
+	/*
+	 * Required during context switch to avoid speculative page table
+	 * walking with the wrong TTBR.
+	 */
+	cpu_set_reserved_ttbr0();
+
+	if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
+		/*
+		 * The ASID is from the current generation, just switch to the
+		 * new pgd. This condition is only true for calls from
+		 * context_switch() and interrupts are already disabled.
+		 */
+		cpu_switch_mm(mm->pgd, mm);
+	else if (irqs_disabled())
+		/*
+		 * Defer the new ASID allocation until after the context
+		 * switch critical region since __new_context() cannot be
+		 * called with interrupts disabled.
+		 */
+		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
+	else
+		/*
+		 * That is a direct call to switch_mm() or activate_mm() with
+		 * interrupts enabled and a new context.
+		 */
+		switch_new_context(mm);
+}
+
+#define init_new_context(tsk,mm)	(__init_new_context(tsk,mm),0)
+#define destroy_context(mm)		do { } while(0)
+
+#define finish_arch_post_lock_switch \
+	finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+	if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
+		struct mm_struct *mm = current->mm;
+		unsigned long flags;
+
+		__new_context(mm);
+
+		local_irq_save(flags);
+		cpu_switch_mm(mm->pgd, mm);
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * This is called when "tsk" is about to enter lazy TLB mode.
+ *
+ * mm:  describes the currently active mm context
+ * tsk: task which is entering lazy tlb
+ * cpu: cpu number which is entering lazy tlb
+ *
+ * tsk->mm will be NULL
+ */
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+/*
+ * This is the actual mm switch as far as the scheduler
+ * is concerned.  No registers are touched.  We avoid
+ * calling the CPU specific function when the mm hasn't
+ * actually changed.
+ */
+static inline void
+switch_mm(struct mm_struct *prev, struct mm_struct *next,
+	  struct task_struct *tsk)
+{
+	unsigned int cpu = smp_processor_id();
+
+	/*
+	 * init_mm.pgd does not contain any user mappings and it is always
+	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
+	 */
+	if (next == &init_mm) {
+		cpu_set_reserved_ttbr0();
+		return;
+	}
+
+	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
+		check_and_switch_context(next, tsk);
+}
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+#define activate_mm(prev,next)	switch_mm(prev, next, NULL)
+
+#endif
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
new file mode 100644
index 000000000..e80e232b7
--- /dev/null
+++ b/arch/arm64/include/asm/module.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_MODULE_H
+#define __ASM_MODULE_H
+
+#include <asm-generic/module.h>
+
+#define MODULE_ARCH_VERMAGIC	"aarch64"
+
+#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
new file mode 100644
index 000000000..13ce4cc18
--- /dev/null
+++ b/arch/arm64/include/asm/neon.h
@@ -0,0 +1,18 @@
+/*
+ * linux/arch/arm64/include/asm/neon.h
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+
+#define cpu_has_neon()		(1)
+
+#define kernel_neon_begin()	kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
+void kernel_neon_end(void);
diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
new file mode 100644
index 000000000..4e603ea36
--- /dev/null
+++ b/arch/arm64/include/asm/opcodes.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/opcodes.h>
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
new file mode 100644
index 000000000..7d9c7e4a4
--- /dev/null
+++ b/arch/arm64/include/asm/page.h
@@ -0,0 +1,78 @@
+/*
+ * Based on arch/arm/include/asm/page.h
+ *
+ * Copyright (C) 1995-2003 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PAGE_H
+#define __ASM_PAGE_H
+
+/* PAGE_SHIFT determines the page size */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define PAGE_SHIFT		16
+#else
+#define PAGE_SHIFT		12
+#endif
+#define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+/*
+ * The idmap and swapper page tables need some space reserved in the kernel
+ * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
+ * map the kernel. With the 64K page configuration, swapper and idmap need to
+ * map to pte level. The swapper also maps the FDT (see __create_page_tables
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
+#else
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
+#endif
+
+#define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/pgtable-types.h>
+
+extern void __cpu_clear_user_page(void *p, unsigned long user);
+extern void __cpu_copy_user_page(void *to, const void *from,
+				 unsigned long user);
+extern void copy_page(void *to, const void *from);
+extern void clear_page(void *to);
+
+#define clear_user_page(addr,vaddr,pg)  __cpu_clear_user_page(addr, vaddr)
+#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr)
+
+typedef struct page *pgtable_t;
+
+#ifdef CONFIG_HAVE_ARCH_PFN_VALID
+extern int pfn_valid(unsigned long);
+#endif
+
+#include <asm/memory.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/getorder.h>
+
+#endif
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
new file mode 100644
index 000000000..b008a72f8
--- /dev/null
+++ b/arch/arm64/include/asm/pci.h
@@ -0,0 +1,43 @@
+#ifndef __ASM_PCI_H
+#define __ASM_PCI_H
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm-generic/pci-bridge.h>
+#include <asm-generic/pci-dma-compat.h>
+
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		0
+
+/*
+ * Set to 1 if the kernel should re-assign all PCI bus numbers
+ */
+#define pcibios_assign_all_busses() \
+	(pci_has_flag(PCI_REASSIGN_ALL_BUS))
+
+/*
+ * PCI address space differs from physical memory address space
+ */
+#define PCI_DMA_BUS_IS_PHYS	(0)
+
+extern int isa_dma_bridge_buggy;
+
+#ifdef CONFIG_PCI
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	/* no legacy IRQ on arm64 */
+	return -ENODEV;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	return 1;
+}
+#endif  /* CONFIG_PCI */
+
+#endif  /* __KERNEL__ */
+#endif  /* __ASM_PCI_H */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
new file mode 100644
index 000000000..4fde8c1df
--- /dev/null
+++ b/arch/arm64/include/asm/percpu.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#ifdef CONFIG_SMP
+
+static inline void set_my_cpu_offset(unsigned long off)
+{
+	asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
+}
+
+static inline unsigned long __my_cpu_offset(void)
+{
+	unsigned long off;
+
+	/*
+	 * We want to allow caching the value, so avoid using volatile and
+	 * instead use a fake stack read to hazard against barrier().
+	 */
+	asm("mrs %0, tpidr_el1" : "=r" (off) :
+		"Q" (*(const unsigned long *)current_stack_pointer));
+
+	return off;
+}
+#define __my_cpu_offset __my_cpu_offset()
+
+#else	/* !CONFIG_SMP */
+
+#define set_my_cpu_offset(x)	do { } while (0)
+
+#endif /* CONFIG_SMP */
+
+#define PERCPU_OP(op, asm_op)						\
+static inline unsigned long __percpu_##op(void *ptr,			\
+			unsigned long val, int size)			\
+{									\
+	unsigned long loop, ret;					\
+									\
+	switch (size) {							\
+	case 1:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_1\n"			\
+			"ldxrb	  %w[ret], %[ptr]\n"			\
+			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
+			"stxrb	  %w[loop], %w[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr] "+Q"(*(u8 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	case 2:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_2\n"			\
+			"ldxrh	  %w[ret], %[ptr]\n"			\
+			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
+			"stxrh	  %w[loop], %w[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr]  "+Q"(*(u16 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	case 4:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_4\n"			\
+			"ldxr	  %w[ret], %[ptr]\n"			\
+			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
+			"stxr	  %w[loop], %w[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr] "+Q"(*(u32 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	case 8:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_8\n"			\
+			"ldxr	  %[ret], %[ptr]\n"			\
+			#asm_op " %[ret], %[ret], %[val]\n"		\
+			"stxr	  %w[loop], %[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr] "+Q"(*(u64 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	return ret;							\
+}
+
+PERCPU_OP(add, add)
+PERCPU_OP(and, and)
+PERCPU_OP(or, orr)
+#undef PERCPU_OP
+
+static inline unsigned long __percpu_read(void *ptr, int size)
+{
+	unsigned long ret;
+
+	switch (size) {
+	case 1:
+		ret = ACCESS_ONCE(*(u8 *)ptr);
+		break;
+	case 2:
+		ret = ACCESS_ONCE(*(u16 *)ptr);
+		break;
+	case 4:
+		ret = ACCESS_ONCE(*(u32 *)ptr);
+		break;
+	case 8:
+		ret = ACCESS_ONCE(*(u64 *)ptr);
+		break;
+	default:
+		BUILD_BUG();
+	}
+
+	return ret;
+}
+
+static inline void __percpu_write(void *ptr, unsigned long val, int size)
+{
+	switch (size) {
+	case 1:
+		ACCESS_ONCE(*(u8 *)ptr) = (u8)val;
+		break;
+	case 2:
+		ACCESS_ONCE(*(u16 *)ptr) = (u16)val;
+		break;
+	case 4:
+		ACCESS_ONCE(*(u32 *)ptr) = (u32)val;
+		break;
+	case 8:
+		ACCESS_ONCE(*(u64 *)ptr) = (u64)val;
+		break;
+	default:
+		BUILD_BUG();
+	}
+}
+
+static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
+						int size)
+{
+	unsigned long ret, loop;
+
+	switch (size) {
+	case 1:
+		do {
+			asm ("//__percpu_xchg_1\n"
+			"ldxrb %w[ret], %[ptr]\n"
+			"stxrb %w[loop], %w[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u8 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	case 2:
+		do {
+			asm ("//__percpu_xchg_2\n"
+			"ldxrh %w[ret], %[ptr]\n"
+			"stxrh %w[loop], %w[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u16 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	case 4:
+		do {
+			asm ("//__percpu_xchg_4\n"
+			"ldxr %w[ret], %[ptr]\n"
+			"stxr %w[loop], %w[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u32 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	case 8:
+		do {
+			asm ("//__percpu_xchg_8\n"
+			"ldxr %[ret], %[ptr]\n"
+			"stxr %w[loop], %[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u64 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	default:
+		BUILD_BUG();
+	}
+
+	return ret;
+}
+
+#define _percpu_read(pcp)						\
+({									\
+	typeof(pcp) __retval;						\
+	preempt_disable();						\
+	__retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), 	\
+					      sizeof(pcp));		\
+	preempt_enable();						\
+	__retval;							\
+})
+
+#define _percpu_write(pcp, val)						\
+do {									\
+	preempt_disable();						\
+	__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), 	\
+				sizeof(pcp));				\
+	preempt_enable();						\
+} while(0)								\
+
+#define _pcp_protect(operation, pcp, val)			\
+({								\
+	typeof(pcp) __retval;					\
+	preempt_disable();					\
+	__retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)),	\
+					  (val), sizeof(pcp));	\
+	preempt_enable();					\
+	__retval;						\
+})
+
+#define _percpu_add(pcp, val) \
+	_pcp_protect(__percpu_add, pcp, val)
+
+#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
+
+#define _percpu_and(pcp, val) \
+	_pcp_protect(__percpu_and, pcp, val)
+
+#define _percpu_or(pcp, val) \
+	_pcp_protect(__percpu_or, pcp, val)
+
+#define _percpu_xchg(pcp, val) (typeof(pcp)) \
+	_pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
+
+#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
+#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
+#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val)
+#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val)
+
+#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val)
+#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val)
+#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val)
+#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val)
+
+#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val)
+#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val)
+#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val)
+#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val)
+
+#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val)
+#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val)
+#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
+#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
+
+#define this_cpu_read_1(pcp) _percpu_read(pcp)
+#define this_cpu_read_2(pcp) _percpu_read(pcp)
+#define this_cpu_read_4(pcp) _percpu_read(pcp)
+#define this_cpu_read_8(pcp) _percpu_read(pcp)
+
+#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val)
+#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val)
+#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val)
+#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val)
+
+#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val)
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
new file mode 100644
index 000000000..d26d1d53c
--- /dev/null
+++ b/arch/arm64/include/asm/perf_event.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+#ifdef CONFIG_HW_PERF_EVENTS
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
new file mode 100644
index 000000000..76420568d
--- /dev/null
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -0,0 +1,136 @@
+/*
+ * Based on arch/arm/include/asm/pgalloc.h
+ *
+ * Copyright (C) 2000-2001 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PGALLOC_H
+#define __ASM_PGALLOC_H
+
+#include <asm/pgtable-hwdef.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+#define check_pgt_cache()		do { } while (0)
+
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return (pmd_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+	free_page((unsigned long)pmd);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+}
+
+#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return (pud_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+	free_page((unsigned long)pud);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE));
+}
+
+#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+{
+	return (pte_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline pgtable_t
+pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	struct page *pte;
+
+	pte = alloc_pages(PGALLOC_GFP, 0);
+	if (!pte)
+		return NULL;
+	if (!pgtable_page_ctor(pte)) {
+		__free_page(pte);
+		return NULL;
+	}
+	return pte;
+}
+
+/*
+ * Free a PTE table.
+ */
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	if (pte)
+		free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+	pgtable_page_dtor(pte);
+	__free_page(pte);
+}
+
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+				  pmdval_t prot)
+{
+	set_pmd(pmdp, __pmd(pte | prot));
+}
+
+/*
+ * Populate the pmdp entry with a pointer to the pte.  This pmd is part
+ * of the mm address space.
+ */
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+{
+	/*
+	 * The pmd must be loaded with the physical address of the PTE table
+	 */
+	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE);
+}
+
+static inline void
+pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
+{
+	__pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE);
+}
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+#endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
new file mode 100644
index 000000000..59bfae75d
--- /dev/null
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PGTABLE_HWDEF_H
+#define __ASM_PGTABLE_HWDEF_H
+
+#define PTRS_PER_PTE		(1 << (PAGE_SHIFT - 3))
+
+/*
+ * PMD_SHIFT determines the size a level 2 page table entry can map.
+ */
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT		((PAGE_SHIFT - 3) * 2 + 3)
+#define PMD_SIZE		(_AC(1, UL) << PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE-1))
+#define PTRS_PER_PMD		PTRS_PER_PTE
+#endif
+
+/*
+ * PUD_SHIFT determines the size a level 1 page table entry can map.
+ */
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT		((PAGE_SHIFT - 3) * 3 + 3)
+#define PUD_SIZE		(_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK		(~(PUD_SIZE-1))
+#define PTRS_PER_PUD		PTRS_PER_PTE
+#endif
+
+/*
+ * PGDIR_SHIFT determines the size a top-level page table entry can map
+ * (depending on the configuration, this level can be 0, 1 or 2).
+ */
+#define PGDIR_SHIFT		((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3)
+#define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
+
+/*
+ * Section address mask and size definitions.
+ */
+#define SECTION_SHIFT		PMD_SHIFT
+#define SECTION_SIZE		(_AC(1, UL) << SECTION_SHIFT)
+#define SECTION_MASK		(~(SECTION_SIZE-1))
+
+/*
+ * Hardware page table definitions.
+ *
+ * Level 1 descriptor (PUD).
+ */
+#define PUD_TYPE_TABLE		(_AT(pudval_t, 3) << 0)
+#define PUD_TABLE_BIT		(_AT(pgdval_t, 1) << 1)
+#define PUD_TYPE_MASK		(_AT(pgdval_t, 3) << 0)
+#define PUD_TYPE_SECT		(_AT(pgdval_t, 1) << 0)
+
+/*
+ * Level 2 descriptor (PMD).
+ */
+#define PMD_TYPE_MASK		(_AT(pmdval_t, 3) << 0)
+#define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
+#define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
+#define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
+
+/*
+ * Section
+ */
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_PROT_NONE	(_AT(pmdval_t, 1) << 58)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
+#define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
+#define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
+#define PMD_SECT_NG		(_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 53)
+#define PMD_SECT_UXN		(_AT(pmdval_t, 1) << 54)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PMD_ATTRINDX(t)		(_AT(pmdval_t, (t)) << 2)
+#define PMD_ATTRINDX_MASK	(_AT(pmdval_t, 7) << 2)
+
+/*
+ * Level 3 descriptor (PTE).
+ */
+#define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
+#define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
+#define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
+#define PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
+#define PTE_RDONLY		(_AT(pteval_t, 1) << 7)		/* AP[2] */
+#define PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
+#define PTE_AF			(_AT(pteval_t, 1) << 10)	/* Access Flag */
+#define PTE_NG			(_AT(pteval_t, 1) << 11)	/* nG */
+#define PTE_PXN			(_AT(pteval_t, 1) << 53)	/* Privileged XN */
+#define PTE_UXN			(_AT(pteval_t, 1) << 54)	/* User XN */
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t)		(_AT(pteval_t, (t)) << 2)
+#define PTE_ATTRINDX_MASK	(_AT(pteval_t, 7) << 2)
+
+/*
+ * 2nd stage PTE definitions
+ */
+#define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
+#define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
+
+#define PMD_S2_RDONLY		(_AT(pmdval_t, 1) << 6)   /* HAP[2:1] */
+#define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
+/*
+ * Memory Attribute override for Stage-2 (MemAttr[3:0])
+ */
+#define PTE_S2_MEMATTR(t)	(_AT(pteval_t, (t)) << 2)
+#define PTE_S2_MEMATTR_MASK	(_AT(pteval_t, 0xf) << 2)
+
+/*
+ * EL2/HYP PTE/PMD definitions
+ */
+#define PMD_HYP			PMD_SECT_USER
+#define PTE_HYP			PTE_USER
+
+/*
+ * Highest possible physical address supported.
+ */
+#define PHYS_MASK_SHIFT		(48)
+#define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
+
+/*
+ * TCR flags.
+ */
+#define TCR_T0SZ_OFFSET		0
+#define TCR_T1SZ_OFFSET		16
+#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH		6
+#define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
+#define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
+#define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
+#define TCR_IRGN_WBnWA		((UL(3) << 8) | (UL(3) << 24))
+#define TCR_IRGN_MASK		((UL(3) << 8) | (UL(3) << 24))
+#define TCR_ORGN_NC		((UL(0) << 10) | (UL(0) << 26))
+#define TCR_ORGN_WBWA		((UL(1) << 10) | (UL(1) << 26))
+#define TCR_ORGN_WT		((UL(2) << 10) | (UL(2) << 26))
+#define TCR_ORGN_WBnWA		((UL(3) << 10) | (UL(3) << 26))
+#define TCR_ORGN_MASK		((UL(3) << 10) | (UL(3) << 26))
+#define TCR_SHARED		((UL(3) << 12) | (UL(3) << 28))
+#define TCR_TG0_4K		(UL(0) << 14)
+#define TCR_TG0_64K		(UL(1) << 14)
+#define TCR_TG0_16K		(UL(2) << 14)
+#define TCR_TG1_16K		(UL(1) << 30)
+#define TCR_TG1_4K		(UL(2) << 30)
+#define TCR_TG1_64K		(UL(3) << 30)
+#define TCR_ASID16		(UL(1) << 36)
+#define TCR_TBI0		(UL(1) << 37)
+
+#endif
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
new file mode 100644
index 000000000..2b1bd7e52
--- /dev/null
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -0,0 +1,95 @@
+/*
+ * Page table types definitions.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_PGTABLE_TYPES_H
+#define __ASM_PGTABLE_TYPES_H
+
+#include <asm/types.h>
+
+typedef u64 pteval_t;
+typedef u64 pmdval_t;
+typedef u64 pudval_t;
+typedef u64 pgdval_t;
+
+#undef STRICT_MM_TYPECHECKS
+
+#ifdef STRICT_MM_TYPECHECKS
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { pteval_t pte; } pte_t;
+#define pte_val(x)	((x).pte)
+#define __pte(x)	((pte_t) { (x) } )
+
+#if CONFIG_PGTABLE_LEVELS > 2
+typedef struct { pmdval_t pmd; } pmd_t;
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) } )
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
+typedef struct { pudval_t pud; } pud_t;
+#define pud_val(x)	((x).pud)
+#define __pud(x)	((pud_t) { (x) } )
+#endif
+
+typedef struct { pgdval_t pgd; } pgd_t;
+#define pgd_val(x)	((x).pgd)
+#define __pgd(x)	((pgd_t) { (x) } )
+
+typedef struct { pteval_t pgprot; } pgprot_t;
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+#else	/* !STRICT_MM_TYPECHECKS */
+
+typedef pteval_t pte_t;
+#define pte_val(x)	(x)
+#define __pte(x)	(x)
+
+#if CONFIG_PGTABLE_LEVELS > 2
+typedef pmdval_t pmd_t;
+#define pmd_val(x)	(x)
+#define __pmd(x)	(x)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
+typedef pudval_t pud_t;
+#define pud_val(x)	(x)
+#define __pud(x)	(x)
+#endif
+
+typedef pgdval_t pgd_t;
+#define pgd_val(x)	(x)
+#define __pgd(x)	(x)
+
+typedef pteval_t pgprot_t;
+#define pgprot_val(x)	(x)
+#define __pgprot(x)	(x)
+
+#endif /* STRICT_MM_TYPECHECKS */
+
+#if CONFIG_PGTABLE_LEVELS == 2
+#include <asm-generic/pgtable-nopmd.h>
+#elif CONFIG_PGTABLE_LEVELS == 3
+#include <asm-generic/pgtable-nopud.h>
+#endif
+
+#endif	/* __ASM_PGTABLE_TYPES_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
new file mode 100644
index 000000000..56283f8a6
--- /dev/null
+++ b/arch/arm64/include/asm/pgtable.h
@@ -0,0 +1,510 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PGTABLE_H
+#define __ASM_PGTABLE_H
+
+#include <asm/proc-fns.h>
+
+#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+/*
+ * Software defined PTE bits definition.
+ */
+#define PTE_VALID		(_AT(pteval_t, 1) << 0)
+#define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
+#define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
+#define PTE_WRITE		(_AT(pteval_t, 1) << 57)
+#define PTE_PROT_NONE		(_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
+
+/*
+ * VMALLOC and SPARSEMEM_VMEMMAP ranges.
+ *
+ * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
+ *	(rounded up to PUD_SIZE).
+ * VMALLOC_START: beginning of the kernel VA space
+ * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
+ *	fixed mappings and modules
+ */
+#define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
+#define VMALLOC_START		(UL(0xffffffffffffffff) << VA_BITS)
+#define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+
+#define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
+
+#define FIRST_USER_ADDRESS	0UL
+
+#ifndef __ASSEMBLY__
+extern void __pte_error(const char *file, int line, unsigned long val);
+extern void __pmd_error(const char *file, int line, unsigned long val);
+extern void __pud_error(const char *file, int line, unsigned long val);
+extern void __pgd_error(const char *file, int line, unsigned long val);
+
+#ifdef CONFIG_SMP
+#define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#else
+#define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF)
+#define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF)
+#endif
+
+#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+
+#define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC	(PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+
+#define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP)
+#define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
+
+#define PAGE_S2			__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE		__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+
+#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+#define PAGE_COPY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+
+#define __P000  PAGE_NONE
+#define __P001  PAGE_READONLY
+#define __P010  PAGE_COPY
+#define __P011  PAGE_COPY
+#define __P100  PAGE_READONLY_EXEC
+#define __P101  PAGE_READONLY_EXEC
+#define __P110  PAGE_COPY_EXEC
+#define __P111  PAGE_COPY_EXEC
+
+#define __S000  PAGE_NONE
+#define __S001  PAGE_READONLY
+#define __S010  PAGE_SHARED
+#define __S011  PAGE_SHARED
+#define __S100  PAGE_READONLY_EXEC
+#define __S101  PAGE_READONLY_EXEC
+#define __S110  PAGE_SHARED_EXEC
+#define __S111  PAGE_SHARED_EXEC
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern struct page *empty_zero_page;
+#define ZERO_PAGE(vaddr)	(empty_zero_page)
+
+#define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
+
+#define pte_pfn(pte)		((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT)
+
+#define pfn_pte(pfn,prot)	(__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+
+#define pte_none(pte)		(!pte_val(pte))
+#define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0))
+#define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
+
+/* Find an entry in the third-level page table. */
+#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
+
+#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte)			do { } while (0)
+#define pte_unmap_nested(pte)		do { } while (0)
+
+/*
+ * The following only work if pte_present(). Undefined behaviour otherwise.
+ */
+#define pte_present(pte)	(!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
+#define pte_dirty(pte)		(!!(pte_val(pte) & PTE_DIRTY))
+#define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
+#define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
+#define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
+#define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
+
+#define pte_valid_user(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
+#define pte_valid_not_user(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+
+static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
+{
+	pte_val(pte) &= ~pgprot_val(prot);
+	return pte;
+}
+
+static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
+{
+	pte_val(pte) |= pgprot_val(prot);
+	return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_WRITE));
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_WRITE));
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_DIRTY));
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_AF));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_AF));
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
+}
+
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+	*ptep = pte;
+
+	/*
+	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
+	 * or update_mmu_cache() have the necessary barriers.
+	 */
+	if (pte_valid_not_user(pte)) {
+		dsb(ishst);
+		isb();
+	}
+}
+
+extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pte)
+{
+	if (pte_valid_user(pte)) {
+		if (!pte_special(pte) && pte_exec(pte))
+			__sync_icache_dcache(pte, addr);
+		if (pte_dirty(pte) && pte_write(pte))
+			pte_val(pte) &= ~PTE_RDONLY;
+		else
+			pte_val(pte) |= PTE_RDONLY;
+	}
+
+	set_pte(ptep, pte);
+}
+
+/*
+ * Huge pte definitions.
+ */
+#define pte_huge(pte)		(!(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
+/*
+ * Hugetlb definitions.
+ */
+#define HUGE_MAX_HSTATE		2
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+#define __HAVE_ARCH_PTE_SPECIAL
+
+static inline pte_t pud_pte(pud_t pud)
+{
+	return __pte(pud_val(pud));
+}
+
+static inline pmd_t pud_pmd(pud_t pud)
+{
+	return __pmd(pud_val(pud));
+}
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
+
+static inline pgprot_t mk_sect_prot(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
+}
+
+/*
+ * THP definitions.
+ */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd)	pte_special(pmd_pte(pmd))
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+struct vm_area_struct;
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp);
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+#define pud_write(pud)		pte_write(pud_pte(pud))
+#define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+
+#define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
+#define __pgprot_modify(prot,mask,bits) \
+	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+
+/*
+ * Mark the prot value as uncacheable and unbufferable.
+ */
+#define pgprot_noncached(prot) \
+	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
+#define pgprot_writecombine(prot) \
+	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
+#define pgprot_device(prot) \
+	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define pmd_present(pmd)	(pmd_val(pmd))
+
+#define pmd_bad(pmd)		(!(pmd_val(pmd) & 2))
+
+#define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+				 PMD_TYPE_TABLE)
+#define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+				 PMD_TYPE_SECT)
+
+#ifdef CONFIG_ARM64_64K_PAGES
+#define pud_sect(pud)		(0)
+#define pud_table(pud)		(1)
+#else
+#define pud_sect(pud)		((pud_val(pud) & PUD_TYPE_MASK) == \
+				 PUD_TYPE_SECT)
+#define pud_table(pud)		((pud_val(pud) & PUD_TYPE_MASK) == \
+				 PUD_TYPE_TABLE)
+#endif
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+	dsb(ishst);
+	isb();
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	set_pmd(pmdp, __pmd(0));
+}
+
+static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+{
+	return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
+}
+
+#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+#define mk_pte(page,prot)	pfn_pte(page_to_pfn(page),prot)
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+#define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
+
+#define pud_none(pud)		(!pud_val(pud))
+#define pud_bad(pud)		(!(pud_val(pud) & 2))
+#define pud_present(pud)	(pud_val(pud))
+
+static inline void set_pud(pud_t *pudp, pud_t pud)
+{
+	*pudp = pud;
+	dsb(ishst);
+	isb();
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	set_pud(pudp, __pud(0));
+}
+
+static inline pmd_t *pud_page_vaddr(pud_t pud)
+{
+	return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+}
+
+/* Find an entry in the second-level page table. */
+#define pmd_index(addr)		(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
+{
+	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
+}
+
+#define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
+
+#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+#define pud_ERROR(pud)		__pud_error(__FILE__, __LINE__, pud_val(pud))
+
+#define pgd_none(pgd)		(!pgd_val(pgd))
+#define pgd_bad(pgd)		(!(pgd_val(pgd) & 2))
+#define pgd_present(pgd)	(pgd_val(pgd))
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	*pgdp = pgd;
+	dsb(ishst);
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	set_pgd(pgdp, __pgd(0));
+}
+
+static inline pud_t *pgd_page_vaddr(pgd_t pgd)
+{
+	return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
+}
+
+/* Find an entry in the frst-level page table. */
+#define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
+{
+	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
+}
+
+#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
+
+#endif  /* CONFIG_PGTABLE_LEVELS > 3 */
+
+#define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(addr)		(((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+
+#define pgd_offset(mm, addr)	((mm)->pgd+pgd_index(addr))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
+			      PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
+	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+	return pte;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+}
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+
+/*
+ * Encode and decode a swap entry:
+ *	bits 0-1:	present (must be zero)
+ *	bits 2-7:	swap type
+ *	bits 8-57:	swap offset
+ */
+#define __SWP_TYPE_SHIFT	2
+#define __SWP_TYPE_BITS		6
+#define __SWP_OFFSET_BITS	50
+#define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
+#define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+#define __SWP_OFFSET_MASK	((1UL << __SWP_OFFSET_BITS) - 1)
+
+#define __swp_type(x)		(((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
+#define __swp_offset(x)		(((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
+#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
+
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(swp)	((pte_t) { (swp).val })
+
+/*
+ * Ensure that there are not more swap files than can be encoded in the kernel
+ * PTEs.
+ */
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
+
+extern int kern_addr_valid(unsigned long addr);
+
+#include <asm-generic/pgtable.h>
+
+#define pgtable_cache_init() do { } while (0)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
new file mode 100644
index 000000000..b7710a596
--- /dev/null
+++ b/arch/arm64/include/asm/pmu.h
@@ -0,0 +1,83 @@
+/*
+ * Based on arch/arm/include/asm/pmu.h
+ *
+ * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PMU_H
+#define __ASM_PMU_H
+
+#ifdef CONFIG_HW_PERF_EVENTS
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event	**events;
+
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long           *used_mask;
+
+	/*
+	 * Hardware lock to serialize accesses to PMU registers. Needed for the
+	 * read/modify/write sequences.
+	 */
+	raw_spinlock_t		pmu_lock;
+};
+
+struct arm_pmu {
+	struct pmu		pmu;
+	cpumask_t		active_irqs;
+	int			*irq_affinity;
+	const char		*name;
+	irqreturn_t		(*handle_irq)(int irq_num, void *dev);
+	void			(*enable)(struct hw_perf_event *evt, int idx);
+	void			(*disable)(struct hw_perf_event *evt, int idx);
+	int			(*get_event_idx)(struct pmu_hw_events *hw_events,
+						 struct hw_perf_event *hwc);
+	int			(*set_event_filter)(struct hw_perf_event *evt,
+						    struct perf_event_attr *attr);
+	u32			(*read_counter)(int idx);
+	void			(*write_counter)(int idx, u32 val);
+	void			(*start)(void);
+	void			(*stop)(void);
+	void			(*reset)(void *);
+	int			(*map_event)(struct perf_event *event);
+	int			num_events;
+	atomic_t		active_events;
+	struct mutex		reserve_mutex;
+	u64			max_period;
+	struct platform_device	*plat_device;
+	struct pmu_hw_events	*(*get_hw_events)(void);
+};
+
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
+
+u64 armpmu_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc,
+			int idx);
+
+int armpmu_event_set_period(struct perf_event *event,
+			    struct hw_perf_event *hwc,
+			    int idx);
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+#endif /* __ASM_PMU_H */
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
new file mode 100644
index 000000000..220633b79
--- /dev/null
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -0,0 +1,50 @@
+/*
+ * Based on arch/arm/include/asm/proc-fns.h
+ *
+ * Copyright (C) 1997-1999 Russell King
+ * Copyright (C) 2000 Deep Blue Solutions Ltd
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PROCFNS_H
+#define __ASM_PROCFNS_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+struct mm_struct;
+struct cpu_suspend_ctx;
+
+extern void cpu_cache_off(void);
+extern void cpu_do_idle(void);
+extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
+extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+void cpu_soft_restart(phys_addr_t cpu_reset,
+		unsigned long addr) __attribute__((noreturn));
+extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
+extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
+
+#include <asm/memory.h>
+
+#define cpu_switch_mm(pgd,mm)				\
+do {							\
+	BUG_ON(pgd == swapper_pg_dir);			\
+	cpu_do_switch_mm(virt_to_phys(pgd),mm);		\
+} while (0)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
new file mode 100644
index 000000000..d2c37a1df
--- /dev/null
+++ b/arch/arm64/include/asm/processor.h
@@ -0,0 +1,172 @@
+/*
+ * Based on arch/arm/include/asm/processor.h
+ *
+ * Copyright (C) 1995-1999 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PROCESSOR_H
+#define __ASM_PROCESSOR_H
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ __label__ _l; _l: &&_l;})
+
+#ifdef __KERNEL__
+
+#include <linux/string.h>
+
+#include <asm/fpsimd.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/ptrace.h>
+#include <asm/types.h>
+
+#ifdef __KERNEL__
+#define STACK_TOP_MAX		TASK_SIZE_64
+#ifdef CONFIG_COMPAT
+#define AARCH32_VECTORS_BASE	0xffff0000
+#define STACK_TOP		(test_thread_flag(TIF_32BIT) ? \
+				AARCH32_VECTORS_BASE : STACK_TOP_MAX)
+#else
+#define STACK_TOP		STACK_TOP_MAX
+#endif /* CONFIG_COMPAT */
+
+extern phys_addr_t arm64_dma_phys_limit;
+#define ARCH_LOW_ADDRESS_LIMIT	(arm64_dma_phys_limit - 1)
+#endif /* __KERNEL__ */
+
+struct debug_info {
+	/* Have we suspended stepping by a debugger? */
+	int			suspended_step;
+	/* Allow breakpoints and watchpoints to be disabled for this thread. */
+	int			bps_disabled;
+	int			wps_disabled;
+	/* Hardware breakpoints pinned to this task. */
+	struct perf_event	*hbp_break[ARM_MAX_BRP];
+	struct perf_event	*hbp_watch[ARM_MAX_WRP];
+};
+
+struct cpu_context {
+	unsigned long x19;
+	unsigned long x20;
+	unsigned long x21;
+	unsigned long x22;
+	unsigned long x23;
+	unsigned long x24;
+	unsigned long x25;
+	unsigned long x26;
+	unsigned long x27;
+	unsigned long x28;
+	unsigned long fp;
+	unsigned long sp;
+	unsigned long pc;
+};
+
+struct thread_struct {
+	struct cpu_context	cpu_context;	/* cpu context */
+	unsigned long		tp_value;
+	struct fpsimd_state	fpsimd_state;
+	unsigned long		fault_address;	/* fault info */
+	unsigned long		fault_code;	/* ESR_EL1 value */
+	struct debug_info	debug;		/* debugging */
+};
+
+#define INIT_THREAD  {	}
+
+static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
+{
+	memset(regs, 0, sizeof(*regs));
+	regs->syscallno = ~0UL;
+	regs->pc = pc;
+}
+
+static inline void start_thread(struct pt_regs *regs, unsigned long pc,
+				unsigned long sp)
+{
+	start_thread_common(regs, pc);
+	regs->pstate = PSR_MODE_EL0t;
+	regs->sp = sp;
+}
+
+#ifdef CONFIG_COMPAT
+static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
+				       unsigned long sp)
+{
+	start_thread_common(regs, pc);
+	regs->pstate = COMPAT_PSR_MODE_USR;
+	if (pc & 1)
+		regs->pstate |= COMPAT_PSR_T_BIT;
+
+#ifdef __AARCH64EB__
+	regs->pstate |= COMPAT_PSR_E_BIT;
+#endif
+
+	regs->compat_sp = sp;
+}
+#endif
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+unsigned long get_wchan(struct task_struct *p);
+
+static inline void cpu_relax(void)
+{
+	asm volatile("yield" ::: "memory");
+}
+
+#define cpu_relax_lowlatency()                cpu_relax()
+
+/* Thread switching */
+extern struct task_struct *cpu_switch_to(struct task_struct *prev,
+					 struct task_struct *next);
+
+#define task_pt_regs(p) \
+	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
+
+#define KSTK_EIP(tsk)	((unsigned long)task_pt_regs(tsk)->pc)
+#define KSTK_ESP(tsk)	user_stack_pointer(task_pt_regs(tsk))
+
+/*
+ * Prefetching support
+ */
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *ptr)
+{
+	asm volatile("prfm pldl1keep, %a0\n" : : "p" (ptr));
+}
+
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *ptr)
+{
+	asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
+}
+
+#define ARCH_HAS_SPINLOCK_PREFETCH
+static inline void spin_lock_prefetch(const void *x)
+{
+	prefetchw(x);
+}
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+#endif
+
+#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
new file mode 100644
index 000000000..2454bc59c
--- /dev/null
+++ b/arch/arm64/include/asm/psci.h
@@ -0,0 +1,20 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2013 ARM Limited
+ */
+
+#ifndef __ASM_PSCI_H
+#define __ASM_PSCI_H
+
+int psci_dt_init(void);
+int psci_acpi_init(void);
+
+#endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
new file mode 100644
index 000000000..d6dd9fdbc
--- /dev/null
+++ b/arch/arm64/include/asm/ptrace.h
@@ -0,0 +1,193 @@
+/*
+ * Based on arch/arm/include/asm/ptrace.h
+ *
+ * Copyright (C) 1996-2003 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PTRACE_H
+#define __ASM_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+
+/* Current Exception Level values, as contained in CurrentEL */
+#define CurrentEL_EL1		(1 << 2)
+#define CurrentEL_EL2		(2 << 2)
+
+/* AArch32-specific ptrace requests */
+#define COMPAT_PTRACE_GETREGS		12
+#define COMPAT_PTRACE_SETREGS		13
+#define COMPAT_PTRACE_GET_THREAD_AREA	22
+#define COMPAT_PTRACE_SET_SYSCALL	23
+#define COMPAT_PTRACE_GETVFPREGS	27
+#define COMPAT_PTRACE_SETVFPREGS	28
+#define COMPAT_PTRACE_GETHBPREGS	29
+#define COMPAT_PTRACE_SETHBPREGS	30
+
+/* AArch32 CPSR bits */
+#define COMPAT_PSR_MODE_MASK	0x0000001f
+#define COMPAT_PSR_MODE_USR	0x00000010
+#define COMPAT_PSR_MODE_FIQ	0x00000011
+#define COMPAT_PSR_MODE_IRQ	0x00000012
+#define COMPAT_PSR_MODE_SVC	0x00000013
+#define COMPAT_PSR_MODE_ABT	0x00000017
+#define COMPAT_PSR_MODE_HYP	0x0000001a
+#define COMPAT_PSR_MODE_UND	0x0000001b
+#define COMPAT_PSR_MODE_SYS	0x0000001f
+#define COMPAT_PSR_T_BIT	0x00000020
+#define COMPAT_PSR_E_BIT	0x00000200
+#define COMPAT_PSR_F_BIT	0x00000040
+#define COMPAT_PSR_I_BIT	0x00000080
+#define COMPAT_PSR_A_BIT	0x00000100
+#define COMPAT_PSR_E_BIT	0x00000200
+#define COMPAT_PSR_J_BIT	0x01000000
+#define COMPAT_PSR_Q_BIT	0x08000000
+#define COMPAT_PSR_V_BIT	0x10000000
+#define COMPAT_PSR_C_BIT	0x20000000
+#define COMPAT_PSR_Z_BIT	0x40000000
+#define COMPAT_PSR_N_BIT	0x80000000
+#define COMPAT_PSR_IT_MASK	0x0600fc00	/* If-Then execution state mask */
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define COMPAT_PSR_ENDSTATE	COMPAT_PSR_E_BIT
+#else
+#define COMPAT_PSR_ENDSTATE	0
+#endif
+
+/*
+ * These are 'magic' values for PTRACE_PEEKUSR that return info about where a
+ * process is located in memory.
+ */
+#define COMPAT_PT_TEXT_ADDR		0x10000
+#define COMPAT_PT_DATA_ADDR		0x10004
+#define COMPAT_PT_TEXT_END_ADDR		0x10008
+#ifndef __ASSEMBLY__
+
+/* sizeof(struct user) for AArch32 */
+#define COMPAT_USER_SZ	296
+
+/* Architecturally defined mapping between AArch32 and AArch64 registers */
+#define compat_usr(x)	regs[(x)]
+#define compat_fp	regs[11]
+#define compat_sp	regs[13]
+#define compat_lr	regs[14]
+#define compat_sp_hyp	regs[15]
+#define compat_sp_irq	regs[16]
+#define compat_lr_irq	regs[17]
+#define compat_sp_svc	regs[18]
+#define compat_lr_svc	regs[19]
+#define compat_sp_abt	regs[20]
+#define compat_lr_abt	regs[21]
+#define compat_sp_und	regs[22]
+#define compat_lr_und	regs[23]
+#define compat_r8_fiq	regs[24]
+#define compat_r9_fiq	regs[25]
+#define compat_r10_fiq	regs[26]
+#define compat_r11_fiq	regs[27]
+#define compat_r12_fiq	regs[28]
+#define compat_sp_fiq	regs[29]
+#define compat_lr_fiq	regs[30]
+
+/*
+ * This struct defines the way the registers are stored on the stack during an
+ * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
+ * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
+ */
+struct pt_regs {
+	union {
+		struct user_pt_regs user_regs;
+		struct {
+			u64 regs[31];
+			u64 sp;
+			u64 pc;
+			u64 pstate;
+		};
+	};
+	u64 orig_x0;
+	u64 syscallno;
+};
+
+#define arch_has_single_step()	(1)
+
+#ifdef CONFIG_COMPAT
+#define compat_thumb_mode(regs) \
+	(((regs)->pstate & COMPAT_PSR_T_BIT))
+#else
+#define compat_thumb_mode(regs) (0)
+#endif
+
+#define user_mode(regs)	\
+	(((regs)->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t)
+
+#define compat_user_mode(regs)	\
+	(((regs)->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) == \
+	 (PSR_MODE32_BIT | PSR_MODE_EL0t))
+
+#define processor_mode(regs) \
+	((regs)->pstate & PSR_MODE_MASK)
+
+#define interrupts_enabled(regs) \
+	(!((regs)->pstate & PSR_I_BIT))
+
+#define fast_interrupts_enabled(regs) \
+	(!((regs)->pstate & PSR_F_BIT))
+
+#define user_stack_pointer(regs) \
+	(!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp)
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
+/*
+ * Are the current registers suitable for user mode? (used to maintain
+ * security in signal handlers)
+ */
+static inline int valid_user_regs(struct user_pt_regs *regs)
+{
+	if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) {
+		regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT);
+
+		/* The T bit is reserved for AArch64 */
+		if (!(regs->pstate & PSR_MODE32_BIT))
+			regs->pstate &= ~COMPAT_PSR_T_BIT;
+
+		return 1;
+	}
+
+	/*
+	 * Force PSR to something logical...
+	 */
+	regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \
+			COMPAT_PSR_T_BIT | PSR_MODE32_BIT;
+
+	if (!(regs->pstate & PSR_MODE32_BIT)) {
+		regs->pstate &= ~COMPAT_PSR_T_BIT;
+		regs->pstate |= PSR_MODE_EL0t;
+	}
+
+	return 0;
+}
+
+#define instruction_pointer(regs)	((unsigned long)(regs)->pc)
+
+#ifdef CONFIG_SMP
+extern unsigned long profile_pc(struct pt_regs *regs);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
new file mode 100644
index 000000000..c76fac979
--- /dev/null
+++ b/arch/arm64/include/asm/seccomp.h
@@ -0,0 +1,25 @@
+/*
+ * arch/arm64/include/asm/seccomp.h
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm/unistd.h>
+
+#ifdef CONFIG_COMPAT
+#define __NR_seccomp_read_32		__NR_compat_read
+#define __NR_seccomp_write_32		__NR_compat_write
+#define __NR_seccomp_exit_32		__NR_compat_exit
+#define __NR_seccomp_sigreturn_32	__NR_compat_rt_sigreturn
+#endif /* CONFIG_COMPAT */
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h
new file mode 100644
index 000000000..4df608a84
--- /dev/null
+++ b/arch/arm64/include/asm/shmparam.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SHMPARAM_H
+#define __ASM_SHMPARAM_H
+
+/*
+ * For IPC syscalls from compat tasks, we need to use the legacy 16k
+ * alignment value. Since we don't have aliasing D-caches, the rest of
+ * the time we can safely use PAGE_SIZE.
+ */
+#define COMPAT_SHMLBA	0x4000
+
+#include <asm-generic/shmparam.h>
+
+#endif /* __ASM_SHMPARAM_H */
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
new file mode 100644
index 000000000..eeaa97559
--- /dev/null
+++ b/arch/arm64/include/asm/signal32.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SIGNAL32_H
+#define __ASM_SIGNAL32_H
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+#define AARCH32_KERN_SIGRET_CODE_OFFSET	0x500
+
+extern const compat_ulong_t aarch32_sigret_code[6];
+
+int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
+		       struct pt_regs *regs);
+int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
+			  struct pt_regs *regs);
+
+void compat_setup_restart_syscall(struct pt_regs *regs);
+#else
+
+static inline int compat_setup_frame(int usid, struct ksignal *ksig,
+				     sigset_t *set, struct pt_regs *regs)
+{
+	return -ENOSYS;
+}
+
+static inline int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
+					struct pt_regs *regs)
+{
+	return -ENOSYS;
+}
+
+static inline void compat_setup_restart_syscall(struct pt_regs *regs)
+{
+}
+#endif /* CONFIG_COMPAT */
+#endif /* __KERNEL__ */
+#endif /* __ASM_SIGNAL32_H */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
new file mode 100644
index 000000000..bf22650b1
--- /dev/null
+++ b/arch/arm64/include/asm/smp.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/thread_info.h>
+
+#ifndef CONFIG_SMP
+# error "<asm/smp.h> included in non-SMP build"
+#endif
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+struct seq_file;
+
+/*
+ * generate IPI list text
+ */
+extern void show_ipi_list(struct seq_file *p, int prec);
+
+/*
+ * Called from C code, this handles an IPI.
+ */
+extern void handle_IPI(int ipinr, struct pt_regs *regs);
+
+/*
+ * Discover the set of possible CPUs and determine their
+ * SMP operations.
+ */
+extern void of_smp_init_cpus(void);
+
+/*
+ * Provide a function to raise an IPI cross call on CPUs in callmap.
+ */
+extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
+
+extern void (*__smp_cross_call)(const struct cpumask *, unsigned int);
+
+/*
+ * Called from the secondary holding pen, this is the secondary CPU entry point.
+ */
+asmlinkage void secondary_start_kernel(void);
+
+/*
+ * Initial data for bringing up a secondary CPU.
+ */
+struct secondary_data {
+	void *stack;
+};
+extern struct secondary_data secondary_data;
+extern void secondary_entry(void);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+extern int __cpu_disable(void);
+
+extern void __cpu_die(unsigned int cpu);
+extern void cpu_die(void);
+
+#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
new file mode 100644
index 000000000..8dcd61e32
--- /dev/null
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -0,0 +1,45 @@
+/*
+ * Definitions specific to SMP platforms.
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_SMP_PLAT_H
+#define __ASM_SMP_PLAT_H
+
+#include <asm/types.h>
+
+struct mpidr_hash {
+	u64	mask;
+	u32	shift_aff[4];
+	u32	bits;
+};
+
+extern struct mpidr_hash mpidr_hash;
+
+static inline u32 mpidr_hash_size(void)
+{
+	return 1 << mpidr_hash.bits;
+}
+
+/*
+ * Logical CPU mapping.
+ */
+extern u64 __cpu_logical_map[NR_CPUS];
+#define cpu_logical_map(cpu)    __cpu_logical_map[cpu]
+
+void __init do_post_cpus_up_work(void);
+
+#endif /* __ASM_SMP_PLAT_H */
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
new file mode 100644
index 000000000..74a9d3018
--- /dev/null
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SPARSEMEM_H
+#define __ASM_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+#define MAX_PHYSMEM_BITS	48
+#define SECTION_SIZE_BITS	30
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
new file mode 100644
index 000000000..cee128732
--- /dev/null
+++ b/arch/arm64/include/asm/spinlock.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/spinlock_types.h>
+#include <asm/processor.h>
+
+/*
+ * Spinlock implementation.
+ *
+ * The memory barriers are implicit with the load-acquire and store-release
+ * instructions.
+ */
+
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval, newval;
+
+	asm volatile(
+	/* Atomically increment the next ticket. */
+"	prfm	pstl1strm, %3\n"
+"1:	ldaxr	%w0, %3\n"
+"	add	%w1, %w0, %w5\n"
+"	stxr	%w2, %w1, %3\n"
+"	cbnz	%w2, 1b\n"
+	/* Did we get the lock? */
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbz	%w1, 3f\n"
+	/*
+	 * No: spin on the owner. Send a local event to avoid missing an
+	 * unlock before the exclusive load.
+	 */
+"	sevl\n"
+"2:	wfe\n"
+"	ldaxrh	%w2, %4\n"
+"	eor	%w1, %w2, %w0, lsr #16\n"
+"	cbnz	%w1, 2b\n"
+	/* We got the lock. Critical section starts here. */
+"3:"
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
+	: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
+	: "memory");
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval;
+
+	asm volatile(
+"	prfm	pstl1strm, %2\n"
+"1:	ldaxr	%w0, %2\n"
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbnz	%w1, 2f\n"
+"	add	%w0, %w0, %3\n"
+"	stxr	%w1, %w0, %2\n"
+"	cbnz	%w1, 1b\n"
+"2:"
+	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
+	: "I" (1 << TICKET_SHIFT)
+	: "memory");
+
+	return !tmp;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	asm volatile(
+"	stlrh	%w1, %0\n"
+	: "=Q" (lock->owner)
+	: "r" (lock->owner + 1)
+	: "memory");
+}
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.owner == lock.next;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	arch_spinlock_t lockval = READ_ONCE(*lock);
+	return (lockval.next - lockval.owner) > 1;
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
+/*
+ * Write lock implementation.
+ *
+ * Write locks set bit 31. Unlocking, is done by writing 0 since the lock is
+ * exclusively held.
+ *
+ * The memory barriers are implicit with the load-acquire and store-release
+ * instructions.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned int tmp;
+
+	asm volatile(
+	"	sevl\n"
+	"1:	wfe\n"
+	"2:	ldaxr	%w0, %1\n"
+	"	cbnz	%w0, 1b\n"
+	"	stxr	%w0, %w2, %1\n"
+	"	cbnz	%w0, 2b\n"
+	: "=&r" (tmp), "+Q" (rw->lock)
+	: "r" (0x80000000)
+	: "memory");
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned int tmp;
+
+	asm volatile(
+	"	ldaxr	%w0, %1\n"
+	"	cbnz	%w0, 1f\n"
+	"	stxr	%w0, %w2, %1\n"
+	"1:\n"
+	: "=&r" (tmp), "+Q" (rw->lock)
+	: "r" (0x80000000)
+	: "memory");
+
+	return !tmp;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	asm volatile(
+	"	stlr	%w1, %0\n"
+	: "=Q" (rw->lock) : "r" (0) : "memory");
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+#define arch_write_can_lock(x)		((x)->lock == 0)
+
+/*
+ * Read lock implementation.
+ *
+ * It exclusively loads the lock value, increments it and stores the new value
+ * back if positive and the CPU still exclusively owns the location. If the
+ * value is negative, the lock is already held.
+ *
+ * During unlocking there may be multiple active read locks but no write lock.
+ *
+ * The memory barriers are implicit with the load-acquire and store-release
+ * instructions.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned int tmp, tmp2;
+
+	asm volatile(
+	"	sevl\n"
+	"1:	wfe\n"
+	"2:	ldaxr	%w0, %2\n"
+	"	add	%w0, %w0, #1\n"
+	"	tbnz	%w0, #31, 1b\n"
+	"	stxr	%w1, %w0, %2\n"
+	"	cbnz	%w1, 2b\n"
+	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
+	:
+	: "memory");
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned int tmp, tmp2;
+
+	asm volatile(
+	"1:	ldxr	%w0, %2\n"
+	"	sub	%w0, %w0, #1\n"
+	"	stlxr	%w1, %w0, %2\n"
+	"	cbnz	%w1, 1b\n"
+	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
+	:
+	: "memory");
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned int tmp, tmp2 = 1;
+
+	asm volatile(
+	"	ldaxr	%w0, %2\n"
+	"	add	%w0, %w0, #1\n"
+	"	tbnz	%w0, #31, 1f\n"
+	"	stxr	%w1, %w0, %2\n"
+	"1:\n"
+	: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
+	:
+	: "memory");
+
+	return !tmp2;
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+#define arch_read_can_lock(x)		((x)->lock < 0x80000000)
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
new file mode 100644
index 000000000..b8d383665
--- /dev/null
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SPINLOCK_TYPES_H
+#define __ASM_SPINLOCK_TYPES_H
+
+#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+# error "please don't include this file directly"
+#endif
+
+#define TICKET_SHIFT	16
+
+typedef struct {
+#ifdef __AARCH64EB__
+	u16 next;
+	u16 owner;
+#else
+	u16 owner;
+	u16 next;
+#endif
+} __aligned(4) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 , 0 }
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
new file mode 100644
index 000000000..fe5e287dc
--- /dev/null
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
+/*
+ * GCC stack protector support.
+ *
+ * Stack protector works by putting predefined pattern at the start of
+ * the stack frame and verifying that it hasn't been overwritten when
+ * returning from the function.  The pattern is called stack canary
+ * and gcc expects it to be defined by a global variable called
+ * "__stack_chk_guard" on ARM.  This unfortunately means that on SMP
+ * we cannot have a different canary value per task.
+ */
+
+#ifndef __ASM_STACKPROTECTOR_H
+#define __ASM_STACKPROTECTOR_H
+
+#include <linux/random.h>
+#include <linux/version.h>
+
+extern unsigned long __stack_chk_guard;
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+	unsigned long canary;
+
+	/* Try to get a semi random initial value. */
+	get_random_bytes(&canary, sizeof(canary));
+	canary ^= LINUX_VERSION_CODE;
+
+	current->stack_canary = canary;
+	__stack_chk_guard = current->stack_canary;
+}
+
+#endif	/* _ASM_STACKPROTECTOR_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
new file mode 100644
index 000000000..7318f6d54
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+struct stackframe {
+	unsigned long fp;
+	unsigned long sp;
+	unsigned long pc;
+};
+
+extern int unwind_frame(struct stackframe *frame);
+extern void walk_stackframe(struct stackframe *frame,
+			    int (*fn)(struct stackframe *, void *), void *data);
+
+#endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
new file mode 100644
index 000000000..15e35598a
--- /dev/null
+++ b/arch/arm64/include/asm/stat.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_STAT_H
+#define __ASM_STAT_H
+
+#include <uapi/asm/stat.h>
+
+#ifdef CONFIG_COMPAT
+
+#include <asm/compat.h>
+
+/*
+ * struct stat64 is needed for compat tasks only. Its definition is different
+ * from the generic struct stat64.
+ */
+struct stat64 {
+	compat_u64	st_dev;
+	unsigned char   __pad0[4];
+
+#define STAT64_HAS_BROKEN_ST_INO	1
+	compat_ulong_t	__st_ino;
+	compat_uint_t	st_mode;
+	compat_uint_t	st_nlink;
+
+	compat_ulong_t	st_uid;
+	compat_ulong_t	st_gid;
+
+	compat_u64	st_rdev;
+	unsigned char   __pad3[4];
+
+	compat_s64	st_size;
+	compat_ulong_t	st_blksize;
+	compat_u64	st_blocks;	/* Number of 512-byte blocks allocated. */
+
+	compat_ulong_t	st_atime;
+	compat_ulong_t	st_atime_nsec;
+
+	compat_ulong_t	st_mtime;
+	compat_ulong_t	st_mtime_nsec;
+
+	compat_ulong_t	st_ctime;
+	compat_ulong_t	st_ctime_nsec;
+
+	compat_u64	st_ino;
+};
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
new file mode 100644
index 000000000..64d2d4884
--- /dev/null
+++ b/arch/arm64/include/asm/string.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_STRING_H
+#define __ASM_STRING_H
+
+#define __HAVE_ARCH_STRRCHR
+extern char *strrchr(const char *, int c);
+
+#define __HAVE_ARCH_STRCHR
+extern char *strchr(const char *, int c);
+
+#define __HAVE_ARCH_STRCMP
+extern int strcmp(const char *, const char *);
+
+#define __HAVE_ARCH_STRNCMP
+extern int strncmp(const char *, const char *, __kernel_size_t);
+
+#define __HAVE_ARCH_STRLEN
+extern __kernel_size_t strlen(const char *);
+
+#define __HAVE_ARCH_STRNLEN
+extern __kernel_size_t strnlen(const char *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *, const void *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMCHR
+extern void *memchr(const void *, int, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
+#endif
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
new file mode 100644
index 000000000..003802f58
--- /dev/null
+++ b/arch/arm64/include/asm/suspend.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_SUSPEND_H
+#define __ASM_SUSPEND_H
+
+#define NR_CTX_REGS 11
+
+/*
+ * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on
+ * the stack, which must be 16-byte aligned on v8
+ */
+struct cpu_suspend_ctx {
+	/*
+	 * This struct must be kept in sync with
+	 * cpu_do_{suspend/resume} in mm/proc.S
+	 */
+	u64 ctx_regs[NR_CTX_REGS];
+	u64 sp;
+} __aligned(16);
+
+struct sleep_save_sp {
+	phys_addr_t *save_ptr_stash;
+	phys_addr_t save_ptr_stash_phys;
+};
+
+extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
+extern void cpu_resume(void);
+#endif
diff --git a/arch/arm64/include/asm/sync_bitops.h b/arch/arm64/include/asm/sync_bitops.h
new file mode 100644
index 000000000..8da0bf4f7
--- /dev/null
+++ b/arch/arm64/include/asm/sync_bitops.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_SYNC_BITOPS_H__
+#define __ASM_SYNC_BITOPS_H__
+
+#include <asm/bitops.h>
+#include <asm/cmpxchg.h>
+
+/* sync_bitops functions are equivalent to the SMP implementation of the
+ * original functions, independently from CONFIG_SMP being defined.
+ *
+ * We need them because _set_bit etc are not SMP safe if !CONFIG_SMP. But
+ * under Xen you might be communicating with a completely external entity
+ * who might be on another CPU (e.g. two uniprocessor guests communicating
+ * via event channels and grant tables). So we need a variant of the bit
+ * ops which are SMP safe even on a UP kernel.
+ */
+
+#define sync_set_bit(nr, p)            set_bit(nr, p)
+#define sync_clear_bit(nr, p)          clear_bit(nr, p)
+#define sync_change_bit(nr, p)         change_bit(nr, p)
+#define sync_test_and_set_bit(nr, p)   test_and_set_bit(nr, p)
+#define sync_test_and_clear_bit(nr, p) test_and_clear_bit(nr, p)
+#define sync_test_and_change_bit(nr, p)        test_and_change_bit(nr, p)
+#define sync_test_bit(nr, addr)                test_bit(nr, addr)
+#define sync_cmpxchg                   cmpxchg
+
+#endif
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
new file mode 100644
index 000000000..709a57446
--- /dev/null
+++ b/arch/arm64/include/asm/syscall.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SYSCALL_H
+#define __ASM_SYSCALL_H
+
+#include <uapi/linux/audit.h>
+#include <linux/compat.h>
+#include <linux/err.h>
+
+extern const void *sys_call_table[];
+
+static inline int syscall_get_nr(struct task_struct *task,
+				 struct pt_regs *regs)
+{
+	return regs->syscallno;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->regs[0] = regs->orig_x0;
+}
+
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->regs[0];
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->regs[0] = (long) error ? error : val;
+}
+
+#define SYSCALL_MAX_ARGS 6
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	if (n == 0)
+		return;
+
+	if (i + n > SYSCALL_MAX_ARGS) {
+		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
+		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
+		pr_warning("%s called with max args %d, handling only %d\n",
+			   __func__, i + n, SYSCALL_MAX_ARGS);
+		memset(args_bad, 0, n_bad * sizeof(args[0]));
+	}
+
+	if (i == 0) {
+		args[0] = regs->orig_x0;
+		args++;
+		i++;
+		n--;
+	}
+
+	memcpy(args, &regs->regs[i], n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	if (n == 0)
+		return;
+
+	if (i + n > SYSCALL_MAX_ARGS) {
+		pr_warning("%s called with max args %d, handling only %d\n",
+			   __func__, i + n, SYSCALL_MAX_ARGS);
+		n = SYSCALL_MAX_ARGS - i;
+	}
+
+	if (i == 0) {
+		regs->orig_x0 = args[0];
+		args++;
+		i++;
+		n--;
+	}
+
+	memcpy(&regs->regs[i], args, n * sizeof(args[0]));
+}
+
+/*
+ * We don't care about endianness (__AUDIT_ARCH_LE bit) here because
+ * AArch64 has the same system calls both on little- and big- endian.
+ */
+static inline int syscall_get_arch(void)
+{
+	if (is_compat_task())
+		return AUDIT_ARCH_ARM;
+
+	return AUDIT_ARCH_AARCH64;
+}
+
+#endif	/* __ASM_SYSCALL_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
new file mode 100644
index 000000000..5c89df0ac
--- /dev/null
+++ b/arch/arm64/include/asm/sysreg.h
@@ -0,0 +1,60 @@
+/*
+ * Macros for accessing system registers with older binutils.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_SYSREG_H
+#define __ASM_SYSREG_H
+
+#define sys_reg(op0, op1, crn, crm, op2) \
+	((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+
+#ifdef __ASSEMBLY__
+
+	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+	.equ	__reg_num_x\num, \num
+	.endr
+	.equ	__reg_num_xzr, 31
+
+	.macro	mrs_s, rt, sreg
+	.inst	0xd5300000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+	.macro	msr_s, sreg, rt
+	.inst	0xd5100000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+#else
+
+asm(
+"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
+"	.equ	__reg_num_x\\num, \\num\n"
+"	.endr\n"
+"	.equ	__reg_num_xzr, 31\n"
+"\n"
+"	.macro	mrs_s, rt, sreg\n"
+"	.inst	0xd5300000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+"\n"
+"	.macro	msr_s, sreg, rt\n"
+"	.inst	0xd5100000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+);
+
+#endif
+
+#endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
new file mode 100644
index 000000000..7a18fabbe
--- /dev/null
+++ b/arch/arm64/include/asm/system_misc.h
@@ -0,0 +1,55 @@
+/*
+ * Based on arch/arm/include/asm/system_misc.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SYSTEM_MISC_H
+#define __ASM_SYSTEM_MISC_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
+#include <linux/reboot.h>
+
+struct pt_regs;
+
+void die(const char *msg, struct pt_regs *regs, int err);
+
+struct siginfo;
+void arm64_notify_die(const char *str, struct pt_regs *regs,
+		      struct siginfo *info, int err);
+
+void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
+					     struct pt_regs *),
+			   int sig, int code, const char *name);
+
+struct mm_struct;
+extern void show_pte(struct mm_struct *mm, unsigned long addr);
+extern void __show_regs(struct pt_regs *);
+
+void soft_restart(unsigned long);
+extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
+
+#define UDBG_UNDEFINED	(1 << 0)
+#define UDBG_SYSCALL	(1 << 1)
+#define UDBG_BADABORT	(1 << 2)
+#define UDBG_SEGV	(1 << 3)
+#define UDBG_BUS	(1 << 4)
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
new file mode 100644
index 000000000..dcd06d18a
--- /dev/null
+++ b/arch/arm64/include/asm/thread_info.h
@@ -0,0 +1,135 @@
+/*
+ * Based on arch/arm/include/asm/thread_info.h
+ *
+ * Copyright (C) 2002 Russell King.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_THREAD_INFO_H
+#define __ASM_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+#ifndef CONFIG_ARM64_64K_PAGES
+#define THREAD_SIZE_ORDER	2
+#endif
+
+#define THREAD_SIZE		16384
+#define THREAD_START_SP		(THREAD_SIZE - 16)
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+#include <asm/types.h>
+
+typedef unsigned long mm_segment_t;
+
+/*
+ * low level task data that entry.S needs immediate access to.
+ * __switch_to() assumes cpu_context follows immediately after cpu_domain.
+ */
+struct thread_info {
+	unsigned long		flags;		/* low level flags */
+	mm_segment_t		addr_limit;	/* address limit */
+	struct task_struct	*task;		/* main task structure */
+	int			preempt_count;	/* 0 => preemptable, <0 => bug */
+	int			cpu;		/* cpu */
+};
+
+#define INIT_THREAD_INFO(tsk)						\
+{									\
+	.task		= &tsk,						\
+	.flags		= 0,						\
+	.preempt_count	= INIT_PREEMPT_COUNT,				\
+	.addr_limit	= KERNEL_DS,					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+/*
+ * how to get the current stack pointer from C
+ */
+register unsigned long current_stack_pointer asm ("sp");
+
+/*
+ * how to get the thread information struct from C
+ */
+static inline struct thread_info *current_thread_info(void) __attribute_const__;
+
+static inline struct thread_info *current_thread_info(void)
+{
+	return (struct thread_info *)
+		(current_stack_pointer & ~(THREAD_SIZE - 1));
+}
+
+#define thread_saved_pc(tsk)	\
+	((unsigned long)(tsk->thread.cpu_context.pc))
+#define thread_saved_sp(tsk)	\
+	((unsigned long)(tsk->thread.cpu_context.sp))
+#define thread_saved_fp(tsk)	\
+	((unsigned long)(tsk->thread.cpu_context.fp))
+
+#endif
+
+/*
+ * thread information flags:
+ *  TIF_SYSCALL_TRACE	- syscall trace active
+ *  TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
+ *  TIF_SYSCALL_AUDIT	- syscall auditing
+ *  TIF_SECOMP		- syscall secure computing
+ *  TIF_SIGPENDING	- signal pending
+ *  TIF_NEED_RESCHED	- rescheduling necessary
+ *  TIF_NOTIFY_RESUME	- callback before returning to user
+ *  TIF_USEDFPU		- FPU was used by this task this quantum (SMP)
+ */
+#define TIF_SIGPENDING		0
+#define TIF_NEED_RESCHED	1
+#define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
+#define TIF_NOHZ		7
+#define TIF_SYSCALL_TRACE	8
+#define TIF_SYSCALL_AUDIT	9
+#define TIF_SYSCALL_TRACEPOINT	10
+#define TIF_SECCOMP		11
+#define TIF_MEMDIE		18	/* is terminating due to OOM killer */
+#define TIF_FREEZE		19
+#define TIF_RESTORE_SIGMASK	20
+#define TIF_SINGLESTEP		21
+#define TIF_32BIT		22	/* 32bit process */
+#define TIF_SWITCH_MM		23	/* deferred switch_mm */
+
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_NOHZ		(1 << TIF_NOHZ)
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_32BIT		(1 << TIF_32BIT)
+
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
+
+#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+				 _TIF_NOHZ)
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h
new file mode 100644
index 000000000..81a076eb3
--- /dev/null
+++ b/arch/arm64/include/asm/timex.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_TIMEX_H
+#define __ASM_TIMEX_H
+
+#include <asm/arch_timer.h>
+
+/*
+ * Use the current timer as a cycle counter since this is what we use for
+ * the delay loop.
+ */
+#define get_cycles()	arch_counter_get_cntvct()
+
+#include <asm-generic/timex.h>
+
+#endif
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
new file mode 100644
index 000000000..3a0242c7e
--- /dev/null
+++ b/arch/arm64/include/asm/tlb.h
@@ -0,0 +1,74 @@
+/*
+ * Based on arch/arm/include/asm/tlb.h
+ *
+ * Copyright (C) 2002 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_TLB_H
+#define __ASM_TLB_H
+
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+
+#define tlb_remove_entry(tlb, entry)	tlb_remove_table(tlb, entry)
+static inline void __tlb_remove_table(void *_table)
+{
+	free_page_and_swap_cache((struct page *)_table);
+}
+#else
+#define tlb_remove_entry(tlb, entry)	tlb_remove_page(tlb, entry)
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+#include <asm-generic/tlb.h>
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	if (tlb->fullmm) {
+		flush_tlb_mm(tlb->mm);
+	} else {
+		struct vm_area_struct vma = { .vm_mm = tlb->mm, };
+		flush_tlb_range(&vma, tlb->start, tlb->end);
+	}
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+				  unsigned long addr)
+{
+	__flush_tlb_pgtable(tlb->mm, addr);
+	pgtable_page_dtor(pte);
+	tlb_remove_entry(tlb, pte);
+}
+
+#if CONFIG_PGTABLE_LEVELS > 2
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
+				  unsigned long addr)
+{
+	__flush_tlb_pgtable(tlb->mm, addr);
+	tlb_remove_entry(tlb, virt_to_page(pmdp));
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
+				  unsigned long addr)
+{
+	__flush_tlb_pgtable(tlb->mm, addr);
+	tlb_remove_entry(tlb, virt_to_page(pudp));
+}
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
new file mode 100644
index 000000000..c3bb05b98
--- /dev/null
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -0,0 +1,176 @@
+/*
+ * Based on arch/arm/include/asm/tlbflush.h
+ *
+ * Copyright (C) 1999-2003 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_TLBFLUSH_H
+#define __ASM_TLBFLUSH_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/sched.h>
+#include <asm/cputype.h>
+
+/*
+ *	TLB Management
+ *	==============
+ *
+ *	The arch/arm64/mm/tlb.S files implement these methods.
+ *
+ *	The TLB specific code is expected to perform whatever tests it needs
+ *	to determine if it should invalidate the TLB for each call.  Start
+ *	addresses are inclusive and end addresses are exclusive; it is safe to
+ *	round these addresses down.
+ *
+ *	flush_tlb_all()
+ *
+ *		Invalidate the entire TLB.
+ *
+ *	flush_tlb_mm(mm)
+ *
+ *		Invalidate all TLB entries in a particular address space.
+ *		- mm	- mm_struct describing address space
+ *
+ *	flush_tlb_range(mm,start,end)
+ *
+ *		Invalidate a range of TLB entries in the specified address
+ *		space.
+ *		- mm	- mm_struct describing address space
+ *		- start - start address (may not be aligned)
+ *		- end	- end address (exclusive, may not be aligned)
+ *
+ *	flush_tlb_page(vaddr,vma)
+ *
+ *		Invalidate the specified page in the specified address range.
+ *		- vaddr - virtual address (may not be aligned)
+ *		- vma	- vma_struct describing address range
+ *
+ *	flush_kern_tlb_page(kaddr)
+ *
+ *		Invalidate the TLB entry for the specified page.  The address
+ *		will be in the kernels virtual memory space.  Current uses
+ *		only require the D-TLB to be invalidated.
+ *		- kaddr - Kernel virtual memory address
+ */
+static inline void flush_tlb_all(void)
+{
+	dsb(ishst);
+	asm("tlbi	vmalle1is");
+	dsb(ish);
+	isb();
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned long asid = (unsigned long)ASID(mm) << 48;
+
+	dsb(ishst);
+	asm("tlbi	aside1is, %0" : : "r" (asid));
+	dsb(ish);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long uaddr)
+{
+	unsigned long addr = uaddr >> 12 |
+		((unsigned long)ASID(vma->vm_mm) << 48);
+
+	dsb(ishst);
+	asm("tlbi	vae1is, %0" : : "r" (addr));
+	dsb(ish);
+}
+
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end)
+{
+	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+	unsigned long addr;
+	start = asid | (start >> 12);
+	end = asid | (end >> 12);
+
+	dsb(ishst);
+	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+		asm("tlbi vae1is, %0" : : "r"(addr));
+	dsb(ish);
+}
+
+static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+	start >>= 12;
+	end >>= 12;
+
+	dsb(ishst);
+	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+		asm("tlbi vaae1is, %0" : : "r"(addr));
+	dsb(ish);
+	isb();
+}
+
+/*
+ * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
+ * necessarily a performance improvement.
+ */
+#define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	if ((end - start) <= MAX_TLB_RANGE)
+		__flush_tlb_range(vma, start, end);
+	else
+		flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if ((end - start) <= MAX_TLB_RANGE)
+		__flush_tlb_kernel_range(start, end);
+	else
+		flush_tlb_all();
+}
+
+/*
+ * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+ * table levels (pgd/pud/pmd).
+ */
+static inline void __flush_tlb_pgtable(struct mm_struct *mm,
+				       unsigned long uaddr)
+{
+	unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+
+	dsb(ishst);
+	asm("tlbi	vae1is, %0" : : "r" (addr));
+	dsb(ish);
+}
+/*
+ * On AArch64, the cache coherency is handled via the set_pte_at() function.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+				    unsigned long addr, pte_t *ptep)
+{
+	/*
+	 * set_pte() does not have a DSB for user mappings, so make sure that
+	 * the page table write is visible.
+	 */
+	dsb(ishst);
+}
+
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 000000000..7ebcd31ce
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,36 @@
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
+
+#ifdef CONFIG_SMP
+
+#include <linux/cpumask.h>
+
+struct cpu_topology {
+	int thread_id;
+	int core_id;
+	int cluster_id;
+	cpumask_t thread_sibling;
+	cpumask_t core_sibling;
+};
+
+extern struct cpu_topology cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu)	(cpu_topology[cpu].cluster_id)
+#define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
new file mode 100644
index 000000000..232e4ba5d
--- /dev/null
+++ b/arch/arm64/include/asm/traps.h
@@ -0,0 +1,46 @@
+/*
+ * Based on arch/arm/include/asm/traps.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_TRAP_H
+#define __ASM_TRAP_H
+
+#include <linux/list.h>
+
+struct pt_regs;
+
+struct undef_hook {
+	struct list_head node;
+	u32 instr_mask;
+	u32 instr_val;
+	u64 pstate_mask;
+	u64 pstate_val;
+	int (*fn)(struct pt_regs *regs, u32 instr);
+};
+
+void register_undef_hook(struct undef_hook *hook);
+void unregister_undef_hook(struct undef_hook *hook);
+
+static inline int in_exception_text(unsigned long ptr)
+{
+	extern char __exception_text_start[];
+	extern char __exception_text_end[];
+
+	return ptr >= (unsigned long)&__exception_text_start &&
+	       ptr < (unsigned long)&__exception_text_end;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
new file mode 100644
index 000000000..07e1ba449
--- /dev/null
+++ b/arch/arm64/include/asm/uaccess.h
@@ -0,0 +1,282 @@
+/*
+ * Based on arch/arm/include/asm/uaccess.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_UACCESS_H
+#define __ASM_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/string.h>
+#include <linux/thread_info.h>
+
+#include <asm/ptrace.h>
+#include <asm/errno.h>
+#include <asm/memory.h>
+#include <asm/compiler.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+	unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+#define KERNEL_DS	(-1UL)
+#define get_ds()	(KERNEL_DS)
+
+#define USER_DS		TASK_SIZE_64
+#define get_fs()	(current_thread_info()->addr_limit)
+
+static inline void set_fs(mm_segment_t fs)
+{
+	current_thread_info()->addr_limit = fs;
+}
+
+#define segment_eq(a, b)	((a) == (b))
+
+/*
+ * Return 1 if addr < current->addr_limit, 0 otherwise.
+ */
+#define __addr_ok(addr)							\
+({									\
+	unsigned long flag;						\
+	asm("cmp %1, %0; cset %0, lo"					\
+		: "=&r" (flag)						\
+		: "r" (addr), "0" (current_thread_info()->addr_limit)	\
+		: "cc");						\
+	flag;								\
+})
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 1 if the range is valid, 0 otherwise.
+ *
+ * This is equivalent to the following test:
+ * (u65)addr + (u65)size <= current->addr_limit
+ *
+ * This needs 65-bit arithmetic.
+ */
+#define __range_ok(addr, size)						\
+({									\
+	unsigned long flag, roksum;					\
+	__chk_user_ptr(addr);						\
+	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
+		: "=&r" (flag), "=&r" (roksum)				\
+		: "1" (addr), "Ir" (size),				\
+		  "r" (current_thread_info()->addr_limit)		\
+		: "cc");						\
+	flag;								\
+})
+
+#define access_ok(type, addr, size)	__range_ok(addr, size)
+#define user_addr_max			get_fs
+
+/*
+ * The "__xxx" versions of the user access functions do not verify the address
+ * space - it must have been done previously with a separate "access_ok()"
+ * call.
+ *
+ * The "__xxx_error" versions set the third argument to -EFAULT if an error
+ * occurs, and leave it unchanged on success.
+ */
+#define __get_user_asm(instr, reg, x, addr, err)			\
+	asm volatile(							\
+	"1:	" instr "	" reg "1, [%2]\n"			\
+	"2:\n"								\
+	"	.section .fixup, \"ax\"\n"				\
+	"	.align	2\n"						\
+	"3:	mov	%w0, %3\n"					\
+	"	mov	%1, #0\n"					\
+	"	b	2b\n"						\
+	"	.previous\n"						\
+	"	.section __ex_table,\"a\"\n"				\
+	"	.align	3\n"						\
+	"	.quad	1b, 3b\n"					\
+	"	.previous"						\
+	: "+r" (err), "=&r" (x)						\
+	: "r" (addr), "i" (-EFAULT))
+
+#define __get_user_err(x, ptr, err)					\
+do {									\
+	unsigned long __gu_val;						\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__get_user_asm("ldrb", "%w", __gu_val, (ptr), (err));	\
+		break;							\
+	case 2:								\
+		__get_user_asm("ldrh", "%w", __gu_val, (ptr), (err));	\
+		break;							\
+	case 4:								\
+		__get_user_asm("ldr", "%w", __gu_val, (ptr), (err));	\
+		break;							\
+	case 8:								\
+		__get_user_asm("ldr", "%",  __gu_val, (ptr), (err));	\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
+} while (0)
+
+#define __get_user(x, ptr)						\
+({									\
+	int __gu_err = 0;						\
+	__get_user_err((x), (ptr), __gu_err);				\
+	__gu_err;							\
+})
+
+#define __get_user_error(x, ptr, err)					\
+({									\
+	__get_user_err((x), (ptr), (err));				\
+	(void)0;							\
+})
+
+#define __get_user_unaligned __get_user
+
+#define get_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	might_fault();							\
+	access_ok(VERIFY_READ, __p, sizeof(*__p)) ?			\
+		__get_user((x), __p) :					\
+		((x) = 0, -EFAULT);					\
+})
+
+#define __put_user_asm(instr, reg, x, addr, err)			\
+	asm volatile(							\
+	"1:	" instr "	" reg "1, [%2]\n"			\
+	"2:\n"								\
+	"	.section .fixup,\"ax\"\n"				\
+	"	.align	2\n"						\
+	"3:	mov	%w0, %3\n"					\
+	"	b	2b\n"						\
+	"	.previous\n"						\
+	"	.section __ex_table,\"a\"\n"				\
+	"	.align	3\n"						\
+	"	.quad	1b, 3b\n"					\
+	"	.previous"						\
+	: "+r" (err)							\
+	: "r" (x), "r" (addr), "i" (-EFAULT))
+
+#define __put_user_err(x, ptr, err)					\
+do {									\
+	__typeof__(*(ptr)) __pu_val = (x);				\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__put_user_asm("strb", "%w", __pu_val, (ptr), (err));	\
+		break;							\
+	case 2:								\
+		__put_user_asm("strh", "%w", __pu_val, (ptr), (err));	\
+		break;							\
+	case 4:								\
+		__put_user_asm("str",  "%w", __pu_val, (ptr), (err));	\
+		break;							\
+	case 8:								\
+		__put_user_asm("str",  "%", __pu_val, (ptr), (err));	\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+} while (0)
+
+#define __put_user(x, ptr)						\
+({									\
+	int __pu_err = 0;						\
+	__put_user_err((x), (ptr), __pu_err);				\
+	__pu_err;							\
+})
+
+#define __put_user_error(x, ptr, err)					\
+({									\
+	__put_user_err((x), (ptr), (err));				\
+	(void)0;							\
+})
+
+#define __put_user_unaligned __put_user
+
+#define put_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	might_fault();							\
+	access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ?			\
+		__put_user((x), __p) :					\
+		-EFAULT;						\
+})
+
+extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
+
+static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		n = __copy_from_user(to, from, n);
+	else /* security hole - plug it */
+		memset(to, 0, n);
+	return n;
+}
+
+static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+
+static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n))
+		n = __copy_in_user(to, from, n);
+	return n;
+}
+
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __clear_user(to, n);
+	return n;
+}
+
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
+
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
+#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
new file mode 100644
index 000000000..3bc498c25
--- /dev/null
+++ b/arch/arm64/include/asm/unistd.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_COMPAT_SYS_GETDENTS64
+#define __ARCH_WANT_COMPAT_STAT64
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+#define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+
+/*
+ * Compat syscall numbers used by the AArch64 kernel.
+ */
+#define __NR_compat_restart_syscall	0
+#define __NR_compat_exit		1
+#define __NR_compat_read		3
+#define __NR_compat_write		4
+#define __NR_compat_sigreturn		119
+#define __NR_compat_rt_sigreturn	173
+
+/*
+ * The following SVCs are ARM private.
+ */
+#define __ARM_NR_COMPAT_BASE		0x0f0000
+#define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
+#define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
+
+#define __NR_compat_syscalls		388
+#endif
+
+#define __ARCH_WANT_SYS_CLONE
+
+#ifndef __COMPAT_SYSCALL_NR
+#include <uapi/asm/unistd.h>
+#endif
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
new file mode 100644
index 000000000..cef934a90
--- /dev/null
+++ b/arch/arm64/include/asm/unistd32.h
@@ -0,0 +1,799 @@
+/*
+ * AArch32 (compat) system call definitions.
+ *
+ * Copyright (C) 2001-2005 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __SYSCALL
+#define __SYSCALL(x, y)
+#endif
+
+#define __NR_restart_syscall 0
+__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
+#define __NR_exit 1
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_fork 2
+__SYSCALL(__NR_fork, sys_fork)
+#define __NR_read 3
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write 4
+__SYSCALL(__NR_write, sys_write)
+#define __NR_open 5
+__SYSCALL(__NR_open, compat_sys_open)
+#define __NR_close 6
+__SYSCALL(__NR_close, sys_close)
+			/* 7 was sys_waitpid */
+__SYSCALL(7, sys_ni_syscall)
+#define __NR_creat 8
+__SYSCALL(__NR_creat, sys_creat)
+#define __NR_link 9
+__SYSCALL(__NR_link, sys_link)
+#define __NR_unlink 10
+__SYSCALL(__NR_unlink, sys_unlink)
+#define __NR_execve 11
+__SYSCALL(__NR_execve, compat_sys_execve)
+#define __NR_chdir 12
+__SYSCALL(__NR_chdir, sys_chdir)
+			/* 13 was sys_time */
+__SYSCALL(13, sys_ni_syscall)
+#define __NR_mknod 14
+__SYSCALL(__NR_mknod, sys_mknod)
+#define __NR_chmod 15
+__SYSCALL(__NR_chmod, sys_chmod)
+#define __NR_lchown 16
+__SYSCALL(__NR_lchown, sys_lchown16)
+			/* 17 was sys_break */
+__SYSCALL(17, sys_ni_syscall)
+			/* 18 was sys_stat */
+__SYSCALL(18, sys_ni_syscall)
+#define __NR_lseek 19
+__SYSCALL(__NR_lseek, compat_sys_lseek)
+#define __NR_getpid 20
+__SYSCALL(__NR_getpid, sys_getpid)
+#define __NR_mount 21
+__SYSCALL(__NR_mount, compat_sys_mount)
+			/* 22 was sys_umount */
+__SYSCALL(22, sys_ni_syscall)
+#define __NR_setuid 23
+__SYSCALL(__NR_setuid, sys_setuid16)
+#define __NR_getuid 24
+__SYSCALL(__NR_getuid, sys_getuid16)
+			/* 25 was sys_stime */
+__SYSCALL(25, sys_ni_syscall)
+#define __NR_ptrace 26
+__SYSCALL(__NR_ptrace, compat_sys_ptrace)
+			/* 27 was sys_alarm */
+__SYSCALL(27, sys_ni_syscall)
+			/* 28 was sys_fstat */
+__SYSCALL(28, sys_ni_syscall)
+#define __NR_pause 29
+__SYSCALL(__NR_pause, sys_pause)
+			/* 30 was sys_utime */
+__SYSCALL(30, sys_ni_syscall)
+			/* 31 was sys_stty */
+__SYSCALL(31, sys_ni_syscall)
+			/* 32 was sys_gtty */
+__SYSCALL(32, sys_ni_syscall)
+#define __NR_access 33
+__SYSCALL(__NR_access, sys_access)
+#define __NR_nice 34
+__SYSCALL(__NR_nice, sys_nice)
+			/* 35 was sys_ftime */
+__SYSCALL(35, sys_ni_syscall)
+#define __NR_sync 36
+__SYSCALL(__NR_sync, sys_sync)
+#define __NR_kill 37
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_rename 38
+__SYSCALL(__NR_rename, sys_rename)
+#define __NR_mkdir 39
+__SYSCALL(__NR_mkdir, sys_mkdir)
+#define __NR_rmdir 40
+__SYSCALL(__NR_rmdir, sys_rmdir)
+#define __NR_dup 41
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_pipe 42
+__SYSCALL(__NR_pipe, sys_pipe)
+#define __NR_times 43
+__SYSCALL(__NR_times, compat_sys_times)
+			/* 44 was sys_prof */
+__SYSCALL(44, sys_ni_syscall)
+#define __NR_brk 45
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_setgid 46
+__SYSCALL(__NR_setgid, sys_setgid16)
+#define __NR_getgid 47
+__SYSCALL(__NR_getgid, sys_getgid16)
+			/* 48 was sys_signal */
+__SYSCALL(48, sys_ni_syscall)
+#define __NR_geteuid 49
+__SYSCALL(__NR_geteuid, sys_geteuid16)
+#define __NR_getegid 50
+__SYSCALL(__NR_getegid, sys_getegid16)
+#define __NR_acct 51
+__SYSCALL(__NR_acct, sys_acct)
+#define __NR_umount2 52
+__SYSCALL(__NR_umount2, sys_umount)
+			/* 53 was sys_lock */
+__SYSCALL(53, sys_ni_syscall)
+#define __NR_ioctl 54
+__SYSCALL(__NR_ioctl, compat_sys_ioctl)
+#define __NR_fcntl 55
+__SYSCALL(__NR_fcntl, compat_sys_fcntl)
+			/* 56 was sys_mpx */
+__SYSCALL(56, sys_ni_syscall)
+#define __NR_setpgid 57
+__SYSCALL(__NR_setpgid, sys_setpgid)
+			/* 58 was sys_ulimit */
+__SYSCALL(58, sys_ni_syscall)
+			/* 59 was sys_olduname */
+__SYSCALL(59, sys_ni_syscall)
+#define __NR_umask 60
+__SYSCALL(__NR_umask, sys_umask)
+#define __NR_chroot 61
+__SYSCALL(__NR_chroot, sys_chroot)
+#define __NR_ustat 62
+__SYSCALL(__NR_ustat, compat_sys_ustat)
+#define __NR_dup2 63
+__SYSCALL(__NR_dup2, sys_dup2)
+#define __NR_getppid 64
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getpgrp 65
+__SYSCALL(__NR_getpgrp, sys_getpgrp)
+#define __NR_setsid 66
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_sigaction 67
+__SYSCALL(__NR_sigaction, compat_sys_sigaction)
+			/* 68 was sys_sgetmask */
+__SYSCALL(68, sys_ni_syscall)
+			/* 69 was sys_ssetmask */
+__SYSCALL(69, sys_ni_syscall)
+#define __NR_setreuid 70
+__SYSCALL(__NR_setreuid, sys_setreuid16)
+#define __NR_setregid 71
+__SYSCALL(__NR_setregid, sys_setregid16)
+#define __NR_sigsuspend 72
+__SYSCALL(__NR_sigsuspend, sys_sigsuspend)
+#define __NR_sigpending 73
+__SYSCALL(__NR_sigpending, compat_sys_sigpending)
+#define __NR_sethostname 74
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setrlimit 75
+__SYSCALL(__NR_setrlimit, compat_sys_setrlimit)
+			/* 76 was compat_sys_getrlimit */
+__SYSCALL(76, sys_ni_syscall)
+#define __NR_getrusage 77
+__SYSCALL(__NR_getrusage, compat_sys_getrusage)
+#define __NR_gettimeofday 78
+__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday)
+#define __NR_settimeofday 79
+__SYSCALL(__NR_settimeofday, compat_sys_settimeofday)
+#define __NR_getgroups 80
+__SYSCALL(__NR_getgroups, sys_getgroups16)
+#define __NR_setgroups 81
+__SYSCALL(__NR_setgroups, sys_setgroups16)
+			/* 82 was compat_sys_select */
+__SYSCALL(82, sys_ni_syscall)
+#define __NR_symlink 83
+__SYSCALL(__NR_symlink, sys_symlink)
+			/* 84 was sys_lstat */
+__SYSCALL(84, sys_ni_syscall)
+#define __NR_readlink 85
+__SYSCALL(__NR_readlink, sys_readlink)
+#define __NR_uselib 86
+__SYSCALL(__NR_uselib, sys_uselib)
+#define __NR_swapon 87
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_reboot 88
+__SYSCALL(__NR_reboot, sys_reboot)
+			/* 89 was sys_readdir */
+__SYSCALL(89, sys_ni_syscall)
+			/* 90 was sys_mmap */
+__SYSCALL(90, sys_ni_syscall)
+#define __NR_munmap 91
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_truncate 92
+__SYSCALL(__NR_truncate, compat_sys_truncate)
+#define __NR_ftruncate 93
+__SYSCALL(__NR_ftruncate, compat_sys_ftruncate)
+#define __NR_fchmod 94
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_fchown 95
+__SYSCALL(__NR_fchown, sys_fchown16)
+#define __NR_getpriority 96
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_setpriority 97
+__SYSCALL(__NR_setpriority, sys_setpriority)
+			/* 98 was sys_profil */
+__SYSCALL(98, sys_ni_syscall)
+#define __NR_statfs 99
+__SYSCALL(__NR_statfs, compat_sys_statfs)
+#define __NR_fstatfs 100
+__SYSCALL(__NR_fstatfs, compat_sys_fstatfs)
+			/* 101 was sys_ioperm */
+__SYSCALL(101, sys_ni_syscall)
+			/* 102 was sys_socketcall */
+__SYSCALL(102, sys_ni_syscall)
+#define __NR_syslog 103
+__SYSCALL(__NR_syslog, sys_syslog)
+#define __NR_setitimer 104
+__SYSCALL(__NR_setitimer, compat_sys_setitimer)
+#define __NR_getitimer 105
+__SYSCALL(__NR_getitimer, compat_sys_getitimer)
+#define __NR_stat 106
+__SYSCALL(__NR_stat, compat_sys_newstat)
+#define __NR_lstat 107
+__SYSCALL(__NR_lstat, compat_sys_newlstat)
+#define __NR_fstat 108
+__SYSCALL(__NR_fstat, compat_sys_newfstat)
+			/* 109 was sys_uname */
+__SYSCALL(109, sys_ni_syscall)
+			/* 110 was sys_iopl */
+__SYSCALL(110, sys_ni_syscall)
+#define __NR_vhangup 111
+__SYSCALL(__NR_vhangup, sys_vhangup)
+			/* 112 was sys_idle */
+__SYSCALL(112, sys_ni_syscall)
+			/* 113 was sys_syscall */
+__SYSCALL(113, sys_ni_syscall)
+#define __NR_wait4 114
+__SYSCALL(__NR_wait4, compat_sys_wait4)
+#define __NR_swapoff 115
+__SYSCALL(__NR_swapoff, sys_swapoff)
+#define __NR_sysinfo 116
+__SYSCALL(__NR_sysinfo, compat_sys_sysinfo)
+			/* 117 was sys_ipc */
+__SYSCALL(117, sys_ni_syscall)
+#define __NR_fsync 118
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_sigreturn 119
+__SYSCALL(__NR_sigreturn, compat_sys_sigreturn_wrapper)
+#define __NR_clone 120
+__SYSCALL(__NR_clone, sys_clone)
+#define __NR_setdomainname 121
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+#define __NR_uname 122
+__SYSCALL(__NR_uname, sys_newuname)
+			/* 123 was sys_modify_ldt */
+__SYSCALL(123, sys_ni_syscall)
+#define __NR_adjtimex 124
+__SYSCALL(__NR_adjtimex, compat_sys_adjtimex)
+#define __NR_mprotect 125
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_sigprocmask 126
+__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask)
+			/* 127 was sys_create_module */
+__SYSCALL(127, sys_ni_syscall)
+#define __NR_init_module 128
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module 129
+__SYSCALL(__NR_delete_module, sys_delete_module)
+			/* 130 was sys_get_kernel_syms */
+__SYSCALL(130, sys_ni_syscall)
+#define __NR_quotactl 131
+__SYSCALL(__NR_quotactl, sys_quotactl)
+#define __NR_getpgid 132
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_fchdir 133
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_bdflush 134
+__SYSCALL(__NR_bdflush, sys_bdflush)
+#define __NR_sysfs 135
+__SYSCALL(__NR_sysfs, sys_sysfs)
+#define __NR_personality 136
+__SYSCALL(__NR_personality, sys_personality)
+			/* 137 was sys_afs_syscall */
+__SYSCALL(137, sys_ni_syscall)
+#define __NR_setfsuid 138
+__SYSCALL(__NR_setfsuid, sys_setfsuid16)
+#define __NR_setfsgid 139
+__SYSCALL(__NR_setfsgid, sys_setfsgid16)
+#define __NR__llseek 140
+__SYSCALL(__NR__llseek, sys_llseek)
+#define __NR_getdents 141
+__SYSCALL(__NR_getdents, compat_sys_getdents)
+#define __NR__newselect 142
+__SYSCALL(__NR__newselect, compat_sys_select)
+#define __NR_flock 143
+__SYSCALL(__NR_flock, sys_flock)
+#define __NR_msync 144
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_readv 145
+__SYSCALL(__NR_readv, compat_sys_readv)
+#define __NR_writev 146
+__SYSCALL(__NR_writev, compat_sys_writev)
+#define __NR_getsid 147
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_fdatasync 148
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#define __NR__sysctl 149
+__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+#define __NR_mlock 150
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock 151
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall 152
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall 153
+__SYSCALL(__NR_munlockall, sys_munlockall)
+#define __NR_sched_setparam 154
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_getparam 155
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setscheduler 156
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler 157
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_yield 158
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_sched_get_priority_max 159
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min 160
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval 161
+__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval)
+#define __NR_nanosleep 162
+__SYSCALL(__NR_nanosleep, compat_sys_nanosleep)
+#define __NR_mremap 163
+__SYSCALL(__NR_mremap, sys_mremap)
+#define __NR_setresuid 164
+__SYSCALL(__NR_setresuid, sys_setresuid16)
+#define __NR_getresuid 165
+__SYSCALL(__NR_getresuid, sys_getresuid16)
+			/* 166 was sys_vm86 */
+__SYSCALL(166, sys_ni_syscall)
+			/* 167 was sys_query_module */
+__SYSCALL(167, sys_ni_syscall)
+#define __NR_poll 168
+__SYSCALL(__NR_poll, sys_poll)
+#define __NR_nfsservctl 169
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
+#define __NR_setresgid 170
+__SYSCALL(__NR_setresgid, sys_setresgid16)
+#define __NR_getresgid 171
+__SYSCALL(__NR_getresgid, sys_getresgid16)
+#define __NR_prctl 172
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_rt_sigreturn 173
+__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn_wrapper)
+#define __NR_rt_sigaction 174
+__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction)
+#define __NR_rt_sigprocmask 175
+__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask)
+#define __NR_rt_sigpending 176
+__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending)
+#define __NR_rt_sigtimedwait 177
+__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo 178
+__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo)
+#define __NR_rt_sigsuspend 179
+__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend)
+#define __NR_pread64 180
+__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper)
+#define __NR_pwrite64 181
+__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper)
+#define __NR_chown 182
+__SYSCALL(__NR_chown, sys_chown16)
+#define __NR_getcwd 183
+__SYSCALL(__NR_getcwd, sys_getcwd)
+#define __NR_capget 184
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset 185
+__SYSCALL(__NR_capset, sys_capset)
+#define __NR_sigaltstack 186
+__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack)
+#define __NR_sendfile 187
+__SYSCALL(__NR_sendfile, compat_sys_sendfile)
+			/* 188 reserved */
+__SYSCALL(188, sys_ni_syscall)
+			/* 189 reserved */
+__SYSCALL(189, sys_ni_syscall)
+#define __NR_vfork 190
+__SYSCALL(__NR_vfork, sys_vfork)
+#define __NR_ugetrlimit 191	/* SuS compliant getrlimit */
+__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit)		/* SuS compliant getrlimit */
+#define __NR_mmap2 192
+__SYSCALL(__NR_mmap2, compat_sys_mmap2_wrapper)
+#define __NR_truncate64 193
+__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
+#define __NR_ftruncate64 194
+__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper)
+#define __NR_stat64 195
+__SYSCALL(__NR_stat64, sys_stat64)
+#define __NR_lstat64 196
+__SYSCALL(__NR_lstat64, sys_lstat64)
+#define __NR_fstat64 197
+__SYSCALL(__NR_fstat64, sys_fstat64)
+#define __NR_lchown32 198
+__SYSCALL(__NR_lchown32, sys_lchown)
+#define __NR_getuid32 199
+__SYSCALL(__NR_getuid32, sys_getuid)
+#define __NR_getgid32 200
+__SYSCALL(__NR_getgid32, sys_getgid)
+#define __NR_geteuid32 201
+__SYSCALL(__NR_geteuid32, sys_geteuid)
+#define __NR_getegid32 202
+__SYSCALL(__NR_getegid32, sys_getegid)
+#define __NR_setreuid32 203
+__SYSCALL(__NR_setreuid32, sys_setreuid)
+#define __NR_setregid32 204
+__SYSCALL(__NR_setregid32, sys_setregid)
+#define __NR_getgroups32 205
+__SYSCALL(__NR_getgroups32, sys_getgroups)
+#define __NR_setgroups32 206
+__SYSCALL(__NR_setgroups32, sys_setgroups)
+#define __NR_fchown32 207
+__SYSCALL(__NR_fchown32, sys_fchown)
+#define __NR_setresuid32 208
+__SYSCALL(__NR_setresuid32, sys_setresuid)
+#define __NR_getresuid32 209
+__SYSCALL(__NR_getresuid32, sys_getresuid)
+#define __NR_setresgid32 210
+__SYSCALL(__NR_setresgid32, sys_setresgid)
+#define __NR_getresgid32 211
+__SYSCALL(__NR_getresgid32, sys_getresgid)
+#define __NR_chown32 212
+__SYSCALL(__NR_chown32, sys_chown)
+#define __NR_setuid32 213
+__SYSCALL(__NR_setuid32, sys_setuid)
+#define __NR_setgid32 214
+__SYSCALL(__NR_setgid32, sys_setgid)
+#define __NR_setfsuid32 215
+__SYSCALL(__NR_setfsuid32, sys_setfsuid)
+#define __NR_setfsgid32 216
+__SYSCALL(__NR_setfsgid32, sys_setfsgid)
+#define __NR_getdents64 217
+__SYSCALL(__NR_getdents64, compat_sys_getdents64)
+#define __NR_pivot_root 218
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+#define __NR_mincore 219
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise 220
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_fcntl64 221
+__SYSCALL(__NR_fcntl64, compat_sys_fcntl64)
+			/* 222 for tux */
+__SYSCALL(222, sys_ni_syscall)
+			/* 223 is unused */
+__SYSCALL(223, sys_ni_syscall)
+#define __NR_gettid 224
+__SYSCALL(__NR_gettid, sys_gettid)
+#define __NR_readahead 225
+__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper)
+#define __NR_setxattr 226
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr 227
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr 228
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr 229
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr 230
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr 231
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
+#define __NR_listxattr 232
+__SYSCALL(__NR_listxattr, sys_listxattr)
+#define __NR_llistxattr 233
+__SYSCALL(__NR_llistxattr, sys_llistxattr)
+#define __NR_flistxattr 234
+__SYSCALL(__NR_flistxattr, sys_flistxattr)
+#define __NR_removexattr 235
+__SYSCALL(__NR_removexattr, sys_removexattr)
+#define __NR_lremovexattr 236
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
+#define __NR_fremovexattr 237
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
+#define __NR_tkill 238
+__SYSCALL(__NR_tkill, sys_tkill)
+#define __NR_sendfile64 239
+__SYSCALL(__NR_sendfile64, sys_sendfile64)
+#define __NR_futex 240
+__SYSCALL(__NR_futex, compat_sys_futex)
+#define __NR_sched_setaffinity 241
+__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity)
+#define __NR_sched_getaffinity 242
+__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity)
+#define __NR_io_setup 243
+__SYSCALL(__NR_io_setup, compat_sys_io_setup)
+#define __NR_io_destroy 244
+__SYSCALL(__NR_io_destroy, sys_io_destroy)
+#define __NR_io_getevents 245
+__SYSCALL(__NR_io_getevents, compat_sys_io_getevents)
+#define __NR_io_submit 246
+__SYSCALL(__NR_io_submit, compat_sys_io_submit)
+#define __NR_io_cancel 247
+__SYSCALL(__NR_io_cancel, sys_io_cancel)
+#define __NR_exit_group 248
+__SYSCALL(__NR_exit_group, sys_exit_group)
+#define __NR_lookup_dcookie 249
+__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie)
+#define __NR_epoll_create 250
+__SYSCALL(__NR_epoll_create, sys_epoll_create)
+#define __NR_epoll_ctl 251
+__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
+#define __NR_epoll_wait 252
+__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
+#define __NR_remap_file_pages 253
+__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+			/* 254 for set_thread_area */
+__SYSCALL(254, sys_ni_syscall)
+			/* 255 for get_thread_area */
+__SYSCALL(255, sys_ni_syscall)
+#define __NR_set_tid_address 256
+__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
+#define __NR_timer_create 257
+__SYSCALL(__NR_timer_create, compat_sys_timer_create)
+#define __NR_timer_settime 258
+__SYSCALL(__NR_timer_settime, compat_sys_timer_settime)
+#define __NR_timer_gettime 259
+__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime)
+#define __NR_timer_getoverrun 260
+__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#define __NR_timer_delete 261
+__SYSCALL(__NR_timer_delete, sys_timer_delete)
+#define __NR_clock_settime 262
+__SYSCALL(__NR_clock_settime, compat_sys_clock_settime)
+#define __NR_clock_gettime 263
+__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime)
+#define __NR_clock_getres 264
+__SYSCALL(__NR_clock_getres, compat_sys_clock_getres)
+#define __NR_clock_nanosleep 265
+__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep)
+#define __NR_statfs64 266
+__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper)
+#define __NR_fstatfs64 267
+__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper)
+#define __NR_tgkill 268
+__SYSCALL(__NR_tgkill, sys_tgkill)
+#define __NR_utimes 269
+__SYSCALL(__NR_utimes, compat_sys_utimes)
+#define __NR_arm_fadvise64_64 270
+__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper)
+#define __NR_pciconfig_iobase 271
+__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase)
+#define __NR_pciconfig_read 272
+__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read)
+#define __NR_pciconfig_write 273
+__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write)
+#define __NR_mq_open 274
+__SYSCALL(__NR_mq_open, compat_sys_mq_open)
+#define __NR_mq_unlink 275
+__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#define __NR_mq_timedsend 276
+__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend)
+#define __NR_mq_timedreceive 277
+__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive)
+#define __NR_mq_notify 278
+__SYSCALL(__NR_mq_notify, compat_sys_mq_notify)
+#define __NR_mq_getsetattr 279
+__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr)
+#define __NR_waitid 280
+__SYSCALL(__NR_waitid, compat_sys_waitid)
+#define __NR_socket 281
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_bind 282
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_connect 283
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_listen 284
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_accept 285
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_getsockname 286
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername 287
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_socketpair 288
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_send 289
+__SYSCALL(__NR_send, sys_send)
+#define __NR_sendto 290
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recv 291
+__SYSCALL(__NR_recv, compat_sys_recv)
+#define __NR_recvfrom 292
+__SYSCALL(__NR_recvfrom, compat_sys_recvfrom)
+#define __NR_shutdown 293
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_setsockopt 294
+__SYSCALL(__NR_setsockopt, compat_sys_setsockopt)
+#define __NR_getsockopt 295
+__SYSCALL(__NR_getsockopt, compat_sys_getsockopt)
+#define __NR_sendmsg 296
+__SYSCALL(__NR_sendmsg, compat_sys_sendmsg)
+#define __NR_recvmsg 297
+__SYSCALL(__NR_recvmsg, compat_sys_recvmsg)
+#define __NR_semop 298
+__SYSCALL(__NR_semop, sys_semop)
+#define __NR_semget 299
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semctl 300
+__SYSCALL(__NR_semctl, compat_sys_semctl)
+#define __NR_msgsnd 301
+__SYSCALL(__NR_msgsnd, compat_sys_msgsnd)
+#define __NR_msgrcv 302
+__SYSCALL(__NR_msgrcv, compat_sys_msgrcv)
+#define __NR_msgget 303
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgctl 304
+__SYSCALL(__NR_msgctl, compat_sys_msgctl)
+#define __NR_shmat 305
+__SYSCALL(__NR_shmat, compat_sys_shmat)
+#define __NR_shmdt 306
+__SYSCALL(__NR_shmdt, sys_shmdt)
+#define __NR_shmget 307
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmctl 308
+__SYSCALL(__NR_shmctl, compat_sys_shmctl)
+#define __NR_add_key 309
+__SYSCALL(__NR_add_key, sys_add_key)
+#define __NR_request_key 310
+__SYSCALL(__NR_request_key, sys_request_key)
+#define __NR_keyctl 311
+__SYSCALL(__NR_keyctl, compat_sys_keyctl)
+#define __NR_semtimedop 312
+__SYSCALL(__NR_semtimedop, compat_sys_semtimedop)
+#define __NR_vserver 313
+__SYSCALL(__NR_vserver, sys_ni_syscall)
+#define __NR_ioprio_set 314
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get 315
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
+#define __NR_inotify_init 316
+__SYSCALL(__NR_inotify_init, sys_inotify_init)
+#define __NR_inotify_add_watch 317
+__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+#define __NR_inotify_rm_watch 318
+__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+#define __NR_mbind 319
+__SYSCALL(__NR_mbind, compat_sys_mbind)
+#define __NR_get_mempolicy 320
+__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
+#define __NR_set_mempolicy 321
+__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
+#define __NR_openat 322
+__SYSCALL(__NR_openat, compat_sys_openat)
+#define __NR_mkdirat 323
+__SYSCALL(__NR_mkdirat, sys_mkdirat)
+#define __NR_mknodat 324
+__SYSCALL(__NR_mknodat, sys_mknodat)
+#define __NR_fchownat 325
+__SYSCALL(__NR_fchownat, sys_fchownat)
+#define __NR_futimesat 326
+__SYSCALL(__NR_futimesat, compat_sys_futimesat)
+#define __NR_fstatat64 327
+__SYSCALL(__NR_fstatat64, sys_fstatat64)
+#define __NR_unlinkat 328
+__SYSCALL(__NR_unlinkat, sys_unlinkat)
+#define __NR_renameat 329
+__SYSCALL(__NR_renameat, sys_renameat)
+#define __NR_linkat 330
+__SYSCALL(__NR_linkat, sys_linkat)
+#define __NR_symlinkat 331
+__SYSCALL(__NR_symlinkat, sys_symlinkat)
+#define __NR_readlinkat 332
+__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#define __NR_fchmodat 333
+__SYSCALL(__NR_fchmodat, sys_fchmodat)
+#define __NR_faccessat 334
+__SYSCALL(__NR_faccessat, sys_faccessat)
+#define __NR_pselect6 335
+__SYSCALL(__NR_pselect6, compat_sys_pselect6)
+#define __NR_ppoll 336
+__SYSCALL(__NR_ppoll, compat_sys_ppoll)
+#define __NR_unshare 337
+__SYSCALL(__NR_unshare, sys_unshare)
+#define __NR_set_robust_list 338
+__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list)
+#define __NR_get_robust_list 339
+__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list)
+#define __NR_splice 340
+__SYSCALL(__NR_splice, sys_splice)
+#define __NR_sync_file_range2 341
+__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper)
+#define __NR_tee 342
+__SYSCALL(__NR_tee, sys_tee)
+#define __NR_vmsplice 343
+__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+#define __NR_move_pages 344
+__SYSCALL(__NR_move_pages, compat_sys_move_pages)
+#define __NR_getcpu 345
+__SYSCALL(__NR_getcpu, sys_getcpu)
+#define __NR_epoll_pwait 346
+__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait)
+#define __NR_kexec_load 347
+__SYSCALL(__NR_kexec_load, compat_sys_kexec_load)
+#define __NR_utimensat 348
+__SYSCALL(__NR_utimensat, compat_sys_utimensat)
+#define __NR_signalfd 349
+__SYSCALL(__NR_signalfd, compat_sys_signalfd)
+#define __NR_timerfd_create 350
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#define __NR_eventfd 351
+__SYSCALL(__NR_eventfd, sys_eventfd)
+#define __NR_fallocate 352
+__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper)
+#define __NR_timerfd_settime 353
+__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime)
+#define __NR_timerfd_gettime 354
+__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime)
+#define __NR_signalfd4 355
+__SYSCALL(__NR_signalfd4, compat_sys_signalfd4)
+#define __NR_eventfd2 356
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
+#define __NR_epoll_create1 357
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
+#define __NR_dup3 358
+__SYSCALL(__NR_dup3, sys_dup3)
+#define __NR_pipe2 359
+__SYSCALL(__NR_pipe2, sys_pipe2)
+#define __NR_inotify_init1 360
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_preadv 361
+__SYSCALL(__NR_preadv, compat_sys_preadv)
+#define __NR_pwritev 362
+__SYSCALL(__NR_pwritev, compat_sys_pwritev)
+#define __NR_rt_tgsigqueueinfo 363
+__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo)
+#define __NR_perf_event_open 364
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_recvmmsg 365
+__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg)
+#define __NR_accept4 366
+__SYSCALL(__NR_accept4, sys_accept4)
+#define __NR_fanotify_init 367
+__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
+#define __NR_fanotify_mark 368
+__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark)
+#define __NR_prlimit64 369
+__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_name_to_handle_at 370
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 371
+__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at)
+#define __NR_clock_adjtime 372
+__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime)
+#define __NR_syncfs 373
+__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg 374
+__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
+#define __NR_setns 375
+__SYSCALL(__NR_setns, sys_setns)
+#define __NR_process_vm_readv 376
+__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv)
+#define __NR_process_vm_writev 377
+__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
+#define __NR_kcmp 378
+__SYSCALL(__NR_kcmp, sys_kcmp)
+#define __NR_finit_module 379
+__SYSCALL(__NR_finit_module, sys_finit_module)
+#define __NR_sched_setattr 380
+__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
+#define __NR_sched_getattr 381
+__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
+#define __NR_renameat2 382
+__SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 383
+__SYSCALL(__NR_seccomp, sys_seccomp)
+#define __NR_getrandom 384
+__SYSCALL(__NR_getrandom, sys_getrandom)
+#define __NR_memfd_create 385
+__SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 386
+__SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_execveat 387
+__SYSCALL(__NR_execveat, compat_sys_execveat)
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
new file mode 100644
index 000000000..839ce0031
--- /dev/null
+++ b/arch/arm64/include/asm/vdso.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_VDSO_H
+#define __ASM_VDSO_H
+
+#ifdef __KERNEL__
+
+/*
+ * Default link address for the vDSO.
+ * Since we randomise the VDSO mapping, there's little point in trying
+ * to prelink this.
+ */
+#define VDSO_LBASE	0x0
+
+#ifndef __ASSEMBLY__
+
+#include <generated/vdso-offsets.h>
+
+#define VDSO_SYMBOL(base, name)						   \
+({									   \
+	(void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \
+})
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_VDSO_H */
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
new file mode 100644
index 000000000..de6619967
--- /dev/null
+++ b/arch/arm64/include/asm/vdso_datapage.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_VDSO_DATAPAGE_H
+#define __ASM_VDSO_DATAPAGE_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+struct vdso_data {
+	__u64 cs_cycle_last;	/* Timebase at clocksource init */
+	__u64 xtime_clock_sec;	/* Kernel time */
+	__u64 xtime_clock_nsec;
+	__u64 xtime_coarse_sec;	/* Coarse time */
+	__u64 xtime_coarse_nsec;
+	__u64 wtm_clock_sec;	/* Wall to monotonic time */
+	__u64 wtm_clock_nsec;
+	__u32 tb_seq_count;	/* Timebase sequence counter */
+	__u32 cs_mult;		/* Clocksource multiplier */
+	__u32 cs_shift;		/* Clocksource shift */
+	__u32 tz_minuteswest;	/* Whacky timezone stuff */
+	__u32 tz_dsttime;
+	__u32 use_syscall;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
new file mode 100644
index 000000000..7a5df5252
--- /dev/null
+++ b/arch/arm64/include/asm/virt.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM__VIRT_H
+#define __ASM__VIRT_H
+
+#define BOOT_CPU_MODE_EL1	(0xe11)
+#define BOOT_CPU_MODE_EL2	(0xe12)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * __boot_cpu_mode records what mode CPUs were booted in.
+ * A correctly-implemented bootloader must start all CPUs in the same mode:
+ * In this case, both 32bit halves of __boot_cpu_mode will contain the
+ * same value (either 0 if booted in EL1, BOOT_CPU_MODE_EL2 if booted in EL2).
+ *
+ * Should the bootloader fail to do this, the two values will be different.
+ * This allows the kernel to flag an error when the secondaries have come up.
+ */
+extern u32 __boot_cpu_mode[2];
+
+void __hyp_set_vectors(phys_addr_t phys_vector_base);
+phys_addr_t __hyp_get_vectors(void);
+
+/* Reports the availability of HYP mode */
+static inline bool is_hyp_mode_available(void)
+{
+	return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
+		__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
+}
+
+/* Check if the bootloader has booted CPUs in different modes */
+static inline bool is_hyp_mode_mismatched(void)
+{
+	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
+}
+
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
new file mode 100644
index 000000000..aab5bf09e
--- /dev/null
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_WORD_AT_A_TIME_H
+#define __ASM_WORD_AT_A_TIME_H
+
+#ifndef __AARCH64EB__
+
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+				     const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return fls64(mask) >> 3;
+}
+
+#define zero_bytemask(mask) (mask)
+
+#else	/* __AARCH64EB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	ldr	%0, %3\n"
+	"2:\n"
+	"	.pushsection .fixup,\"ax\"\n"
+	"	.align 2\n"
+	"3:	and	%1, %2, #0x7\n"
+	"	bic	%2, %2, #0x7\n"
+	"	ldr	%0, [%2]\n"
+	"	lsl	%1, %1, #0x3\n"
+#ifndef __AARCH64EB__
+	"	lsr	%0, %0, %1\n"
+#else
+	"	lsl	%0, %0, %1\n"
+#endif
+	"	b	2b\n"
+	"	.popsection\n"
+	"	.pushsection __ex_table,\"a\"\n"
+	"	.align	3\n"
+	"	.quad	1b, 3b\n"
+	"	.popsection"
+	: "=&r" (ret), "=&r" (offset)
+	: "r" (addr), "Q" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+#endif /* __ASM_WORD_AT_A_TIME_H */
diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h
new file mode 100644
index 000000000..86553213c
--- /dev/null
+++ b/arch/arm64/include/asm/xen/events.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_ARM64_XEN_EVENTS_H
+#define _ASM_ARM64_XEN_EVENTS_H
+
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+
+enum ipi_vector {
+	XEN_PLACEHOLDER_VECTOR,
+
+	/* Xen IPIs go here */
+	XEN_NR_IPIS,
+};
+
+static inline int xen_irqs_disabled(struct pt_regs *regs)
+{
+	return raw_irqs_disabled_flags((unsigned long) regs->pstate);
+}
+
+#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
+
+#endif /* _ASM_ARM64_XEN_EVENTS_H */
diff --git a/arch/arm64/include/asm/xen/hypercall.h b/arch/arm64/include/asm/xen/hypercall.h
new file mode 100644
index 000000000..74b0c423f
--- /dev/null
+++ b/arch/arm64/include/asm/xen/hypercall.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/hypercall.h>
diff --git a/arch/arm64/include/asm/xen/hypervisor.h b/arch/arm64/include/asm/xen/hypervisor.h
new file mode 100644
index 000000000..f263da8e8
--- /dev/null
+++ b/arch/arm64/include/asm/xen/hypervisor.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/hypervisor.h>
diff --git a/arch/arm64/include/asm/xen/interface.h b/arch/arm64/include/asm/xen/interface.h
new file mode 100644
index 000000000..44457aebe
--- /dev/null
+++ b/arch/arm64/include/asm/xen/interface.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/interface.h>
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
new file mode 100644
index 000000000..2052102b4
--- /dev/null
+++ b/arch/arm64/include/asm/xen/page-coherent.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/page-coherent.h>
diff --git a/arch/arm64/include/asm/xen/page.h b/arch/arm64/include/asm/xen/page.h
new file mode 100644
index 000000000..bed87ec36
--- /dev/null
+++ b/arch/arm64/include/asm/xen/page.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/page.h>
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
new file mode 100644
index 000000000..825b0fe51
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -0,0 +1,22 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += kvm_para.h
+
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += fcntl.h
+header-y += hwcap.h
+header-y += kvm_para.h
+header-y += perf_regs.h
+header-y += param.h
+header-y += ptrace.h
+header-y += setup.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += stat.h
+header-y += statfs.h
+header-y += ucontext.h
+header-y += unistd.h
diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000..22d6d8885
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/auxvec.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_AUXVEC_H
+#define __ASM_AUXVEC_H
+
+/* vDSO location */
+#define AT_SYSINFO_EHDR	33
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/bitsperlong.h b/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..fce9c2924
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif	/* __ASM_BITSPERLONG_H */
diff --git a/arch/arm64/include/uapi/asm/byteorder.h b/arch/arm64/include/uapi/asm/byteorder.h
new file mode 100644
index 000000000..dc19e9537
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/byteorder.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BYTEORDER_H
+#define __ASM_BYTEORDER_H
+
+#ifdef __AARCH64EB__
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+
+#endif	/* __ASM_BYTEORDER_H */
diff --git a/arch/arm64/include/uapi/asm/fcntl.h b/arch/arm64/include/uapi/asm/fcntl.h
new file mode 100644
index 000000000..cd2e630c2
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/fcntl.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_FCNTL_H
+#define __ASM_FCNTL_H
+
+/*
+ * Using our own definitions for AArch32 (compat) support.
+ */
+#define O_DIRECTORY	 040000	/* must be a directory */
+#define O_NOFOLLOW	0100000	/* don't follow links */
+#define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
+#define O_LARGEFILE	0400000
+
+#include <asm-generic/fcntl.h>
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
new file mode 100644
index 000000000..73cf0f54d
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _UAPI__ASM_HWCAP_H
+#define _UAPI__ASM_HWCAP_H
+
+/*
+ * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
+ */
+#define HWCAP_FP		(1 << 0)
+#define HWCAP_ASIMD		(1 << 1)
+#define HWCAP_EVTSTRM		(1 << 2)
+#define HWCAP_AES		(1 << 3)
+#define HWCAP_PMULL		(1 << 4)
+#define HWCAP_SHA1		(1 << 5)
+#define HWCAP_SHA2		(1 << 6)
+#define HWCAP_CRC32		(1 << 7)
+
+#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..d26832022
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1	0
+#define KVM_SPSR_SVC	KVM_SPSR_EL1
+#define KVM_SPSR_ABT	1
+#define KVM_SPSR_UND	2
+#define KVM_SPSR_IRQ	3
+#define KVM_SPSR_FIQ	4
+#define KVM_NR_SPSR	5
+
+#ifndef __ASSEMBLY__
+#include <linux/psci.h>
+#include <asm/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_REG_SIZE(id)						\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+	struct user_pt_regs regs;	/* sp = sp_el0 */
+
+	__u64	sp_el1;
+	__u64	elr_el1;
+
+	__u64	spsr[KVM_NR_SPSR];
+
+	struct user_fpsimd_state fp_regs;
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_AEM_V8		0
+#define KVM_ARM_TARGET_FOUNDATION_V8	1
+#define KVM_ARM_TARGET_CORTEX_A57	2
+#define KVM_ARM_TARGET_XGENE_POTENZA	3
+#define KVM_ARM_TARGET_CORTEX_A53	4
+
+#define KVM_ARM_NUM_TARGETS		5
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
+/* Supported VGICv3 address types  */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST	2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+
+#define KVM_VGIC_V3_DIST_SIZE		SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
+
+struct kvm_vcpu_init {
+	__u32 target;
+	__u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT	16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX		(0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK	0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT	8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR	(0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK	0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT	0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG		(0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK	0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT	14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK	0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT	11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK	0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT	7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK	0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT	3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
+
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+	KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+	(KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+	ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+	ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+	ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+	ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+	ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
+#define KVM_DEV_ARM_VGIC_GRP_CTRL	4
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
+#define KVM_ARM_IRQ_GIC_MAX		127
+#endif
+
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
+
+#endif
+
+#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/include/uapi/asm/param.h b/arch/arm64/include/uapi/asm/param.h
new file mode 100644
index 000000000..8e3a281d4
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/param.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PARAM_H
+#define __ASM_PARAM_H
+
+#define EXEC_PAGESIZE	65536
+
+#include <asm-generic/param.h>
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000..172b8317e
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_ARM64_PERF_REGS_H
+#define _ASM_ARM64_PERF_REGS_H
+
+enum perf_event_arm_regs {
+	PERF_REG_ARM64_X0,
+	PERF_REG_ARM64_X1,
+	PERF_REG_ARM64_X2,
+	PERF_REG_ARM64_X3,
+	PERF_REG_ARM64_X4,
+	PERF_REG_ARM64_X5,
+	PERF_REG_ARM64_X6,
+	PERF_REG_ARM64_X7,
+	PERF_REG_ARM64_X8,
+	PERF_REG_ARM64_X9,
+	PERF_REG_ARM64_X10,
+	PERF_REG_ARM64_X11,
+	PERF_REG_ARM64_X12,
+	PERF_REG_ARM64_X13,
+	PERF_REG_ARM64_X14,
+	PERF_REG_ARM64_X15,
+	PERF_REG_ARM64_X16,
+	PERF_REG_ARM64_X17,
+	PERF_REG_ARM64_X18,
+	PERF_REG_ARM64_X19,
+	PERF_REG_ARM64_X20,
+	PERF_REG_ARM64_X21,
+	PERF_REG_ARM64_X22,
+	PERF_REG_ARM64_X23,
+	PERF_REG_ARM64_X24,
+	PERF_REG_ARM64_X25,
+	PERF_REG_ARM64_X26,
+	PERF_REG_ARM64_X27,
+	PERF_REG_ARM64_X28,
+	PERF_REG_ARM64_X29,
+	PERF_REG_ARM64_LR,
+	PERF_REG_ARM64_SP,
+	PERF_REG_ARM64_PC,
+	PERF_REG_ARM64_MAX,
+};
+#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/arch/arm64/include/uapi/asm/posix_types.h b/arch/arm64/include/uapi/asm/posix_types.h
new file mode 100644
index 000000000..7985ff60c
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/posix_types.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_POSIX_TYPES_H
+#define __ASM_POSIX_TYPES_H
+
+typedef unsigned short __kernel_old_uid_t;
+typedef unsigned short __kernel_old_gid_t;
+#define __kernel_old_uid_t __kernel_old_uid_t
+
+#include <asm-generic/posix_types.h>
+
+#endif /*  __ASM_POSIX_TYPES_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
new file mode 100644
index 000000000..6913643bb
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -0,0 +1,92 @@
+/*
+ * Based on arch/arm/include/asm/ptrace.h
+ *
+ * Copyright (C) 1996-2003 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _UAPI__ASM_PTRACE_H
+#define _UAPI__ASM_PTRACE_H
+
+#include <linux/types.h>
+
+#include <asm/hwcap.h>
+
+
+/*
+ * PSR bits
+ */
+#define PSR_MODE_EL0t	0x00000000
+#define PSR_MODE_EL1t	0x00000004
+#define PSR_MODE_EL1h	0x00000005
+#define PSR_MODE_EL2t	0x00000008
+#define PSR_MODE_EL2h	0x00000009
+#define PSR_MODE_EL3t	0x0000000c
+#define PSR_MODE_EL3h	0x0000000d
+#define PSR_MODE_MASK	0x0000000f
+
+/* AArch32 CPSR bits */
+#define PSR_MODE32_BIT		0x00000010
+
+/* AArch64 SPSR bits */
+#define PSR_F_BIT	0x00000040
+#define PSR_I_BIT	0x00000080
+#define PSR_A_BIT	0x00000100
+#define PSR_D_BIT	0x00000200
+#define PSR_Q_BIT	0x08000000
+#define PSR_V_BIT	0x10000000
+#define PSR_C_BIT	0x20000000
+#define PSR_Z_BIT	0x40000000
+#define PSR_N_BIT	0x80000000
+
+/*
+ * Groups of PSR bits
+ */
+#define PSR_f		0xff000000	/* Flags		*/
+#define PSR_s		0x00ff0000	/* Status		*/
+#define PSR_x		0x0000ff00	/* Extension		*/
+#define PSR_c		0x000000ff	/* Control		*/
+
+
+#ifndef __ASSEMBLY__
+
+/*
+ * User structures for general purpose, floating point and debug registers.
+ */
+struct user_pt_regs {
+	__u64		regs[31];
+	__u64		sp;
+	__u64		pc;
+	__u64		pstate;
+};
+
+struct user_fpsimd_state {
+	__uint128_t	vregs[32];
+	__u32		fpsr;
+	__u32		fpcr;
+};
+
+struct user_hwdebug_state {
+	__u32		dbg_info;
+	__u32		pad;
+	struct {
+		__u64	addr;
+		__u32	ctrl;
+		__u32	pad;
+	}		dbg_regs[16];
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/include/uapi/asm/setup.h b/arch/arm64/include/uapi/asm/setup.h
new file mode 100644
index 000000000..9cf2e46fb
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/setup.h
@@ -0,0 +1,26 @@
+/*
+ * Based on arch/arm/include/asm/setup.h
+ *
+ * Copyright (C) 1997-1999 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SETUP_H
+#define __ASM_SETUP_H
+
+#include <linux/types.h>
+
+#define COMMAND_LINE_SIZE	2048
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
new file mode 100644
index 000000000..ee469be1a
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _UAPI__ASM_SIGCONTEXT_H
+#define _UAPI__ASM_SIGCONTEXT_H
+
+#include <linux/types.h>
+
+/*
+ * Signal context structure - contains all info to do with the state
+ * before the signal handler was invoked.
+ */
+struct sigcontext {
+	__u64 fault_address;
+	/* AArch64 registers */
+	__u64 regs[31];
+	__u64 sp;
+	__u64 pc;
+	__u64 pstate;
+	/* 4K reserved for FP/SIMD state and future expansion */
+	__u8 __reserved[4096] __attribute__((__aligned__(16)));
+};
+
+/*
+ * Header to be used at the beginning of structures extending the user
+ * context. Such structures must be placed after the rt_sigframe on the stack
+ * and be 16-byte aligned. The last structure must be a dummy one with the
+ * magic and size set to 0.
+ */
+struct _aarch64_ctx {
+	__u32 magic;
+	__u32 size;
+};
+
+#define FPSIMD_MAGIC	0x46508001
+
+struct fpsimd_context {
+	struct _aarch64_ctx head;
+	__u32 fpsr;
+	__u32 fpcr;
+	__uint128_t vregs[32];
+};
+
+/* ESR_EL1 context */
+#define ESR_MAGIC	0x45535201
+
+struct esr_context {
+	struct _aarch64_ctx head;
+	__u64 esr;
+};
+
+#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/include/uapi/asm/siginfo.h b/arch/arm64/include/uapi/asm/siginfo.h
new file mode 100644
index 000000000..5a74a0853
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/siginfo.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SIGINFO_H
+#define __ASM_SIGINFO_H
+
+#define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/signal.h b/arch/arm64/include/uapi/asm/signal.h
new file mode 100644
index 000000000..8d1e72364
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/signal.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SIGNAL_H
+#define __ASM_SIGNAL_H
+
+/* Required for AArch32 compatibility. */
+#define SA_RESTORER	0x04000000
+
+#include <asm-generic/signal.h>
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/stat.h b/arch/arm64/include/uapi/asm/stat.h
new file mode 100644
index 000000000..eeb702e50
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/stat.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <asm-generic/stat.h>
diff --git a/arch/arm64/include/uapi/asm/statfs.h b/arch/arm64/include/uapi/asm/statfs.h
new file mode 100644
index 000000000..6f6219050
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/statfs.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_STATFS_H
+#define __ASM_STATFS_H
+
+#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4)))
+
+#include <asm-generic/statfs.h>
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/ucontext.h b/arch/arm64/include/uapi/asm/ucontext.h
new file mode 100644
index 000000000..791de8e89
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/ucontext.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _UAPI__ASM_UCONTEXT_H
+#define _UAPI__ASM_UCONTEXT_H
+
+#include <linux/types.h>
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext	 *uc_link;
+	stack_t		  uc_stack;
+	sigset_t	  uc_sigmask;
+	/* glibc uses a 1024-bit sigset_t */
+	__u8		  __unused[1024 / 8 - sizeof(sigset_t)];
+	/* last for future expansion */
+	struct sigcontext uc_mcontext;
+};
+
+#endif /* _UAPI__ASM_UCONTEXT_H */
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
new file mode 100644
index 000000000..1caadc24e
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <asm-generic/unistd.h>
diff --git a/arch/arm64/kernel/.gitignore b/arch/arm64/kernel/.gitignore
new file mode 100644
index 000000000..c5f676c3c
--- /dev/null
+++ b/arch/arm64/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
new file mode 100644
index 000000000..426d0763c
--- /dev/null
+++ b/arch/arm64/kernel/Makefile
@@ -0,0 +1,47 @@
+#
+# Makefile for the linux kernel.
+#
+
+CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
+AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_armv8_deprecated.o := -I$(src)
+
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+CFLAGS_REMOVE_return_address.o = -pg
+
+# Object file lists.
+arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
+			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
+			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
+			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o	\
+			   return_address.o cpuinfo.o cpu_errata.o		\
+			   cpufeature.o alternative.o cacheinfo.o
+
+arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
+					   sys_compat.o entry32.o		\
+					   ../../arm/kernel/opcodes.o
+arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
+arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
+arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o topology.o
+arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
+arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
+arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
+arm64-obj-$(CONFIG_CPU_PM)		+= sleep.o suspend.o
+arm64-obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
+arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
+arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
+arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_PCI)			+= pci.o
+arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
+arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+
+obj-y					+= $(arm64-obj-y) vdso/
+obj-m					+= $(arm64-obj-m)
+head-y					:= head.o
+extra-y					:= $(head-y) vmlinux.lds
+
+# vDSO - this must be built first to generate the symbol offsets
+$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
+$(obj)/vdso/vdso-offsets.h: $(obj)/vdso
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
new file mode 100644
index 000000000..8b8395588
--- /dev/null
+++ b/arch/arm64/kernel/acpi.c
@@ -0,0 +1,345 @@
+/*
+ *  ARM64 Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *	Author: Al Stone <al.stone@linaro.org>
+ *	Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *	Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *	Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
+ *	Author: Naresh Bhat <naresh.bhat@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "ACPI: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/smp.h>
+
+#include <asm/cputype.h>
+#include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+
+int acpi_noirq = 1;		/* skip ACPI IRQ initialization */
+int acpi_disabled = 1;
+EXPORT_SYMBOL(acpi_disabled);
+
+int acpi_pci_disabled = 1;	/* skip ACPI PCI scan and IRQ initialization */
+EXPORT_SYMBOL(acpi_pci_disabled);
+
+/* Processors with enabled flag and sane MPIDR */
+static int enabled_cpus;
+
+/* Boot CPU is valid or not in MADT */
+static bool bootcpu_valid  __initdata;
+
+static bool param_acpi_off __initdata;
+static bool param_acpi_force __initdata;
+
+static int __init parse_acpi(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	/* "acpi=off" disables both ACPI table parsing and interpreter */
+	if (strcmp(arg, "off") == 0)
+		param_acpi_off = true;
+	else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
+		param_acpi_force = true;
+	else
+		return -EINVAL;	/* Core will print when we return error */
+
+	return 0;
+}
+early_param("acpi", parse_acpi);
+
+static int __init dt_scan_depth1_nodes(unsigned long node,
+				       const char *uname, int depth,
+				       void *data)
+{
+	/*
+	 * Return 1 as soon as we encounter a node at depth 1 that is
+	 * not the /chosen node.
+	 */
+	if (depth == 1 && (strcmp(uname, "chosen") != 0))
+		return 1;
+	return 0;
+}
+
+/*
+ * __acpi_map_table() will be called before page_init(), so early_ioremap()
+ * or early_memremap() should be called here to for ACPI table mapping.
+ */
+char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+{
+	if (!size)
+		return NULL;
+
+	return early_memremap(phys, size);
+}
+
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+	if (!map || !size)
+		return;
+
+	early_memunmap(map, size);
+}
+
+/**
+ * acpi_map_gic_cpu_interface - generates a logical cpu number
+ * and map to MPIDR represented by GICC structure
+ */
+static void __init
+acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
+{
+	int i;
+	u64 mpidr = processor->arm_mpidr & MPIDR_HWID_BITMASK;
+	bool enabled = !!(processor->flags & ACPI_MADT_ENABLED);
+
+	if (mpidr == INVALID_HWID) {
+		pr_info("Skip MADT cpu entry with invalid MPIDR\n");
+		return;
+	}
+
+	total_cpus++;
+	if (!enabled)
+		return;
+
+	if (enabled_cpus >=  NR_CPUS) {
+		pr_warn("NR_CPUS limit of %d reached, Processor %d/0x%llx ignored.\n",
+			NR_CPUS, total_cpus, mpidr);
+		return;
+	}
+
+	/* Check if GICC structure of boot CPU is available in the MADT */
+	if (cpu_logical_map(0) == mpidr) {
+		if (bootcpu_valid) {
+			pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+			       mpidr);
+			return;
+		}
+
+		bootcpu_valid = true;
+	}
+
+	/*
+	 * Duplicate MPIDRs are a recipe for disaster. Scan
+	 * all initialized entries and check for
+	 * duplicates. If any is found just ignore the CPU.
+	 */
+	for (i = 1; i < enabled_cpus; i++) {
+		if (cpu_logical_map(i) == mpidr) {
+			pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+			       mpidr);
+			return;
+		}
+	}
+
+	if (!acpi_psci_present())
+		return;
+
+	cpu_ops[enabled_cpus] = cpu_get_ops("psci");
+	/* CPU 0 was already initialized */
+	if (enabled_cpus) {
+		if (!cpu_ops[enabled_cpus])
+			return;
+
+		if (cpu_ops[enabled_cpus]->cpu_init(NULL, enabled_cpus))
+			return;
+
+		/* map the logical cpu id to cpu MPIDR */
+		cpu_logical_map(enabled_cpus) = mpidr;
+	}
+
+	enabled_cpus++;
+}
+
+static int __init
+acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
+				const unsigned long end)
+{
+	struct acpi_madt_generic_interrupt *processor;
+
+	processor = (struct acpi_madt_generic_interrupt *)header;
+
+	if (BAD_MADT_ENTRY(processor, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+	acpi_map_gic_cpu_interface(processor);
+	return 0;
+}
+
+/* Parse GIC cpu interface entries in MADT for SMP init */
+void __init acpi_init_cpus(void)
+{
+	int count, i;
+
+	/*
+	 * do a partial walk of MADT to determine how many CPUs
+	 * we have including disabled CPUs, and get information
+	 * we need for SMP init
+	 */
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+			acpi_parse_gic_cpu_interface, 0);
+
+	if (!count) {
+		pr_err("No GIC CPU interface entries present\n");
+		return;
+	} else if (count < 0) {
+		pr_err("Error parsing GIC CPU interface entry\n");
+		return;
+	}
+
+	if (!bootcpu_valid) {
+		pr_err("MADT missing boot CPU MPIDR, not enabling secondaries\n");
+		return;
+	}
+
+	for (i = 0; i < enabled_cpus; i++)
+		set_cpu_possible(i, true);
+
+	/* Make boot-up look pretty */
+	pr_info("%d CPUs enabled, %d CPUs total\n", enabled_cpus, total_cpus);
+}
+
+/*
+ * acpi_fadt_sanity_check() - Check FADT presence and carry out sanity
+ *			      checks on it
+ *
+ * Return 0 on success,  <0 on failure
+ */
+static int __init acpi_fadt_sanity_check(void)
+{
+	struct acpi_table_header *table;
+	struct acpi_table_fadt *fadt;
+	acpi_status status;
+	acpi_size tbl_size;
+	int ret = 0;
+
+	/*
+	 * FADT is required on arm64; retrieve it to check its presence
+	 * and carry out revision and ACPI HW reduced compliancy tests
+	 */
+	status = acpi_get_table_with_size(ACPI_SIG_FADT, 0, &table, &tbl_size);
+	if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+
+		pr_err("Failed to get FADT table, %s\n", msg);
+		return -ENODEV;
+	}
+
+	fadt = (struct acpi_table_fadt *)table;
+
+	/*
+	 * Revision in table header is the FADT Major revision, and there
+	 * is a minor revision of FADT which was introduced by ACPI 5.1,
+	 * we only deal with ACPI 5.1 or newer revision to get GIC and SMP
+	 * boot protocol configuration data.
+	 */
+	if (table->revision < 5 ||
+	   (table->revision == 5 && fadt->minor_revision < 1)) {
+		pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n",
+		       table->revision, fadt->minor_revision);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
+		pr_err("FADT not ACPI hardware reduced compliant\n");
+		ret = -EINVAL;
+	}
+
+out:
+	/*
+	 * acpi_get_table_with_size() creates FADT table mapping that
+	 * should be released after parsing and before resuming boot
+	 */
+	early_acpi_os_unmap_memory(table, tbl_size);
+	return ret;
+}
+
+/*
+ * acpi_boot_table_init() called from setup_arch(), always.
+ *	1. find RSDP and get its address, and then find XSDT
+ *	2. extract all tables and checksums them all
+ *	3. check ACPI FADT revision
+ *	4. check ACPI FADT HW reduced flag
+ *
+ * We can parse ACPI boot-time tables such as MADT after
+ * this function is called.
+ *
+ * On return ACPI is enabled if either:
+ *
+ * - ACPI tables are initialized and sanity checks passed
+ * - acpi=force was passed in the command line and ACPI was not disabled
+ *   explicitly through acpi=off command line parameter
+ *
+ * ACPI is disabled on function return otherwise
+ */
+void __init acpi_boot_table_init(void)
+{
+	/*
+	 * Enable ACPI instead of device tree unless
+	 * - ACPI has been disabled explicitly (acpi=off), or
+	 * - the device tree is not empty (it has more than just a /chosen node)
+	 *   and ACPI has not been force enabled (acpi=force)
+	 */
+	if (param_acpi_off ||
+	    (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+		return;
+
+	/*
+	 * ACPI is disabled at this point. Enable it in order to parse
+	 * the ACPI tables and carry out sanity checks
+	 */
+	enable_acpi();
+
+	/*
+	 * If ACPI tables are initialized and FADT sanity checks passed,
+	 * leave ACPI enabled and carry on booting; otherwise disable ACPI
+	 * on initialization error.
+	 * If acpi=force was passed on the command line it forces ACPI
+	 * to be enabled even if its initialization failed.
+	 */
+	if (acpi_table_init() || acpi_fadt_sanity_check()) {
+		pr_err("Failed to init ACPI tables\n");
+		if (!param_acpi_force)
+			disable_acpi();
+	}
+}
+
+void __init acpi_gic_init(void)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+	acpi_size tbl_size;
+	int err;
+
+	if (acpi_disabled)
+		return;
+
+	status = acpi_get_table_with_size(ACPI_SIG_MADT, 0, &table, &tbl_size);
+	if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+
+		pr_err("Failed to get MADT table, %s\n", msg);
+		return;
+	}
+
+	err = gic_v2_acpi_init(table);
+	if (err)
+		pr_err("Failed to initialize GIC IRQ controller");
+
+	early_acpi_os_unmap_memory((char *)table, tbl_size);
+}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
new file mode 100644
index 000000000..28f8365ed
--- /dev/null
+++ b/arch/arm64/kernel/alternative.c
@@ -0,0 +1,85 @@
+/*
+ * alternative runtime patching
+ * inspired by the x86 version
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <asm/cacheflush.h>
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+#include <linux/stop_machine.h>
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+struct alt_region {
+	struct alt_instr *begin;
+	struct alt_instr *end;
+};
+
+static int __apply_alternatives(void *alt_region)
+{
+	struct alt_instr *alt;
+	struct alt_region *region = alt_region;
+	u8 *origptr, *replptr;
+
+	for (alt = region->begin; alt < region->end; alt++) {
+		if (!cpus_have_cap(alt->cpufeature))
+			continue;
+
+		BUG_ON(alt->alt_len != alt->orig_len);
+
+		pr_info_once("patching kernel code\n");
+
+		origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
+		replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
+		memcpy(origptr, replptr, alt->alt_len);
+		flush_icache_range((uintptr_t)origptr,
+				   (uintptr_t)(origptr + alt->alt_len));
+	}
+
+	return 0;
+}
+
+void apply_alternatives_all(void)
+{
+	struct alt_region region = {
+		.begin	= __alt_instructions,
+		.end	= __alt_instructions_end,
+	};
+
+	/* better not try code patching on a live SMP system */
+	stop_machine(__apply_alternatives, &region, NULL);
+}
+
+void apply_alternatives(void *start, size_t length)
+{
+	struct alt_region region = {
+		.begin	= start,
+		.end	= start + length,
+	};
+
+	__apply_alternatives(&region);
+}
+
+void free_alternatives_memory(void)
+{
+	free_reserved_area(__alt_instructions, __alt_instructions_end,
+			   0, "alternatives");
+}
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
new file mode 100644
index 000000000..a85843ddb
--- /dev/null
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -0,0 +1,67 @@
+/*
+ * Based on arch/arm/kernel/armksyms.c
+ *
+ * Copyright (C) 2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/cryptohash.h>
+#include <linux/delay.h>
+#include <linux/in6.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+
+#include <asm/checksum.h>
+
+EXPORT_SYMBOL(copy_page);
+EXPORT_SYMBOL(clear_page);
+
+	/* user mem (segment) */
+EXPORT_SYMBOL(__copy_from_user);
+EXPORT_SYMBOL(__copy_to_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__copy_in_user);
+
+	/* physical memory */
+EXPORT_SYMBOL(memstart_addr);
+
+	/* string / mem functions */
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(memcmp);
+
+	/* atomic bitops */
+EXPORT_SYMBOL(set_bit);
+EXPORT_SYMBOL(test_and_set_bit);
+EXPORT_SYMBOL(clear_bit);
+EXPORT_SYMBOL(test_and_clear_bit);
+EXPORT_SYMBOL(change_bit);
+EXPORT_SYMBOL(test_and_change_bit);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
new file mode 100644
index 000000000..7922c2e71
--- /dev/null
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -0,0 +1,662 @@
+/*
+ *  Copyright (C) 2014 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+
+#include <asm/insn.h>
+#include <asm/opcodes.h>
+#include <asm/system_misc.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/cpufeature.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace-events-emulation.h"
+
+/*
+ * The runtime support for deprecated instruction support can be in one of
+ * following three states -
+ *
+ * 0 = undef
+ * 1 = emulate (software emulation)
+ * 2 = hw (supported in hardware)
+ */
+enum insn_emulation_mode {
+	INSN_UNDEF,
+	INSN_EMULATE,
+	INSN_HW,
+};
+
+enum legacy_insn_status {
+	INSN_DEPRECATED,
+	INSN_OBSOLETE,
+};
+
+struct insn_emulation_ops {
+	const char		*name;
+	enum legacy_insn_status	status;
+	struct undef_hook	*hooks;
+	int			(*set_hw_mode)(bool enable);
+};
+
+struct insn_emulation {
+	struct list_head node;
+	struct insn_emulation_ops *ops;
+	int current_mode;
+	int min;
+	int max;
+};
+
+static LIST_HEAD(insn_emulation);
+static int nr_insn_emulated;
+static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
+
+static void register_emulation_hooks(struct insn_emulation_ops *ops)
+{
+	struct undef_hook *hook;
+
+	BUG_ON(!ops->hooks);
+
+	for (hook = ops->hooks; hook->instr_mask; hook++)
+		register_undef_hook(hook);
+
+	pr_notice("Registered %s emulation handler\n", ops->name);
+}
+
+static void remove_emulation_hooks(struct insn_emulation_ops *ops)
+{
+	struct undef_hook *hook;
+
+	BUG_ON(!ops->hooks);
+
+	for (hook = ops->hooks; hook->instr_mask; hook++)
+		unregister_undef_hook(hook);
+
+	pr_notice("Removed %s emulation handler\n", ops->name);
+}
+
+static void enable_insn_hw_mode(void *data)
+{
+	struct insn_emulation *insn = (struct insn_emulation *)data;
+	if (insn->ops->set_hw_mode)
+		insn->ops->set_hw_mode(true);
+}
+
+static void disable_insn_hw_mode(void *data)
+{
+	struct insn_emulation *insn = (struct insn_emulation *)data;
+	if (insn->ops->set_hw_mode)
+		insn->ops->set_hw_mode(false);
+}
+
+/* Run set_hw_mode(mode) on all active CPUs */
+static int run_all_cpu_set_hw_mode(struct insn_emulation *insn, bool enable)
+{
+	if (!insn->ops->set_hw_mode)
+		return -EINVAL;
+	if (enable)
+		on_each_cpu(enable_insn_hw_mode, (void *)insn, true);
+	else
+		on_each_cpu(disable_insn_hw_mode, (void *)insn, true);
+	return 0;
+}
+
+/*
+ * Run set_hw_mode for all insns on a starting CPU.
+ * Returns:
+ *  0 		- If all the hooks ran successfully.
+ * -EINVAL	- At least one hook is not supported by the CPU.
+ */
+static int run_all_insn_set_hw_mode(unsigned long cpu)
+{
+	int rc = 0;
+	unsigned long flags;
+	struct insn_emulation *insn;
+
+	raw_spin_lock_irqsave(&insn_emulation_lock, flags);
+	list_for_each_entry(insn, &insn_emulation, node) {
+		bool enable = (insn->current_mode == INSN_HW);
+		if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) {
+			pr_warn("CPU[%ld] cannot support the emulation of %s",
+				cpu, insn->ops->name);
+			rc = -EINVAL;
+		}
+	}
+	raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
+	return rc;
+}
+
+static int update_insn_emulation_mode(struct insn_emulation *insn,
+				       enum insn_emulation_mode prev)
+{
+	int ret = 0;
+
+	switch (prev) {
+	case INSN_UNDEF: /* Nothing to be done */
+		break;
+	case INSN_EMULATE:
+		remove_emulation_hooks(insn->ops);
+		break;
+	case INSN_HW:
+		if (!run_all_cpu_set_hw_mode(insn, false))
+			pr_notice("Disabled %s support\n", insn->ops->name);
+		break;
+	}
+
+	switch (insn->current_mode) {
+	case INSN_UNDEF:
+		break;
+	case INSN_EMULATE:
+		register_emulation_hooks(insn->ops);
+		break;
+	case INSN_HW:
+		ret = run_all_cpu_set_hw_mode(insn, true);
+		if (!ret)
+			pr_notice("Enabled %s support\n", insn->ops->name);
+		break;
+	}
+
+	return ret;
+}
+
+static void register_insn_emulation(struct insn_emulation_ops *ops)
+{
+	unsigned long flags;
+	struct insn_emulation *insn;
+
+	insn = kzalloc(sizeof(*insn), GFP_KERNEL);
+	insn->ops = ops;
+	insn->min = INSN_UNDEF;
+
+	switch (ops->status) {
+	case INSN_DEPRECATED:
+		insn->current_mode = INSN_EMULATE;
+		/* Disable the HW mode if it was turned on at early boot time */
+		run_all_cpu_set_hw_mode(insn, false);
+		insn->max = INSN_HW;
+		break;
+	case INSN_OBSOLETE:
+		insn->current_mode = INSN_UNDEF;
+		insn->max = INSN_EMULATE;
+		break;
+	}
+
+	raw_spin_lock_irqsave(&insn_emulation_lock, flags);
+	list_add(&insn->node, &insn_emulation);
+	nr_insn_emulated++;
+	raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
+
+	/* Register any handlers if required */
+	update_insn_emulation_mode(insn, INSN_UNDEF);
+}
+
+static int emulation_proc_handler(struct ctl_table *table, int write,
+				  void __user *buffer, size_t *lenp,
+				  loff_t *ppos)
+{
+	int ret = 0;
+	struct insn_emulation *insn = (struct insn_emulation *) table->data;
+	enum insn_emulation_mode prev_mode = insn->current_mode;
+
+	table->data = &insn->current_mode;
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (ret || !write || prev_mode == insn->current_mode)
+		goto ret;
+
+	ret = update_insn_emulation_mode(insn, prev_mode);
+	if (ret) {
+		/* Mode change failed, revert to previous mode. */
+		insn->current_mode = prev_mode;
+		update_insn_emulation_mode(insn, INSN_UNDEF);
+	}
+ret:
+	table->data = insn;
+	return ret;
+}
+
+static struct ctl_table ctl_abi[] = {
+	{
+		.procname = "abi",
+		.mode = 0555,
+	},
+	{ }
+};
+
+static void register_insn_emulation_sysctl(struct ctl_table *table)
+{
+	unsigned long flags;
+	int i = 0;
+	struct insn_emulation *insn;
+	struct ctl_table *insns_sysctl, *sysctl;
+
+	insns_sysctl = kzalloc(sizeof(*sysctl) * (nr_insn_emulated + 1),
+			      GFP_KERNEL);
+
+	raw_spin_lock_irqsave(&insn_emulation_lock, flags);
+	list_for_each_entry(insn, &insn_emulation, node) {
+		sysctl = &insns_sysctl[i];
+
+		sysctl->mode = 0644;
+		sysctl->maxlen = sizeof(int);
+
+		sysctl->procname = insn->ops->name;
+		sysctl->data = insn;
+		sysctl->extra1 = &insn->min;
+		sysctl->extra2 = &insn->max;
+		sysctl->proc_handler = emulation_proc_handler;
+		i++;
+	}
+	raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
+
+	table->child = insns_sysctl;
+	register_sysctl_table(table);
+}
+
+/*
+ *  Implement emulation of the SWP/SWPB instructions using load-exclusive and
+ *  store-exclusive.
+ *
+ *  Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
+ *  Where: Rt  = destination
+ *	   Rt2 = source
+ *	   Rn  = address
+ */
+
+/*
+ * Error-checking SWP macros implemented using ldxr{b}/stxr{b}
+ */
+#define __user_swpX_asm(data, addr, res, temp, B)		\
+	__asm__ __volatile__(					\
+	"	mov		%w2, %w1\n"			\
+	"0:	ldxr"B"		%w1, [%3]\n"			\
+	"1:	stxr"B"		%w0, %w2, [%3]\n"		\
+	"	cbz		%w0, 2f\n"			\
+	"	mov		%w0, %w4\n"			\
+	"2:\n"							\
+	"	.pushsection	 .fixup,\"ax\"\n"		\
+	"	.align		2\n"				\
+	"3:	mov		%w0, %w5\n"			\
+	"	b		2b\n"				\
+	"	.popsection"					\
+	"	.pushsection	 __ex_table,\"a\"\n"		\
+	"	.align		3\n"				\
+	"	.quad		0b, 3b\n"			\
+	"	.quad		1b, 3b\n"			\
+	"	.popsection"					\
+	: "=&r" (res), "+r" (data), "=&r" (temp)		\
+	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)		\
+	: "memory")
+
+#define __user_swp_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "")
+#define __user_swpb_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "b")
+
+/*
+ * Bit 22 of the instruction encoding distinguishes between
+ * the SWP and SWPB variants (bit set means SWPB).
+ */
+#define TYPE_SWPB (1 << 22)
+
+/*
+ * Set up process info to signal segmentation fault - called on access error.
+ */
+static void set_segfault(struct pt_regs *regs, unsigned long addr)
+{
+	siginfo_t info;
+
+	down_read(&current->mm->mmap_sem);
+	if (find_vma(current->mm, addr) == NULL)
+		info.si_code = SEGV_MAPERR;
+	else
+		info.si_code = SEGV_ACCERR;
+	up_read(&current->mm->mmap_sem);
+
+	info.si_signo = SIGSEGV;
+	info.si_errno = 0;
+	info.si_addr  = (void *) instruction_pointer(regs);
+
+	pr_debug("SWP{B} emulation: access caused memory abort!\n");
+	arm64_notify_die("Illegal memory access", regs, &info, 0);
+}
+
+static int emulate_swpX(unsigned int address, unsigned int *data,
+			unsigned int type)
+{
+	unsigned int res = 0;
+
+	if ((type != TYPE_SWPB) && (address & 0x3)) {
+		/* SWP to unaligned address not permitted */
+		pr_debug("SWP instruction on unaligned pointer!\n");
+		return -EFAULT;
+	}
+
+	while (1) {
+		unsigned long temp;
+
+		if (type == TYPE_SWPB)
+			__user_swpb_asm(*data, address, res, temp);
+		else
+			__user_swp_asm(*data, address, res, temp);
+
+		if (likely(res != -EAGAIN) || signal_pending(current))
+			break;
+
+		cond_resched();
+	}
+
+	return res;
+}
+
+/*
+ * swp_handler logs the id of calling process, dissects the instruction, sanity
+ * checks the memory location, calls emulate_swpX for the actual operation and
+ * deals with fixup/error handling before returning
+ */
+static int swp_handler(struct pt_regs *regs, u32 instr)
+{
+	u32 destreg, data, type, address = 0;
+	int rn, rt2, res = 0;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
+
+	type = instr & TYPE_SWPB;
+
+	switch (arm_check_condition(instr, regs->pstate)) {
+	case ARM_OPCODE_CONDTEST_PASS:
+		break;
+	case ARM_OPCODE_CONDTEST_FAIL:
+		/* Condition failed - return to next instruction */
+		goto ret;
+	case ARM_OPCODE_CONDTEST_UNCOND:
+		/* If unconditional encoding - not a SWP, undef */
+		return -EFAULT;
+	default:
+		return -EINVAL;
+	}
+
+	rn = aarch32_insn_extract_reg_num(instr, A32_RN_OFFSET);
+	rt2 = aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET);
+
+	address = (u32)regs->user_regs.regs[rn];
+	data	= (u32)regs->user_regs.regs[rt2];
+	destreg = aarch32_insn_extract_reg_num(instr, A32_RT_OFFSET);
+
+	pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
+		rn, address, destreg,
+		aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data);
+
+	/* Check access in reasonable access range for both SWP and SWPB */
+	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+		pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n",
+			address);
+		goto fault;
+	}
+
+	res = emulate_swpX(address, &data, type);
+	if (res == -EFAULT)
+		goto fault;
+	else if (res == 0)
+		regs->user_regs.regs[destreg] = data;
+
+ret:
+	if (type == TYPE_SWPB)
+		trace_instruction_emulation("swpb", regs->pc);
+	else
+		trace_instruction_emulation("swp", regs->pc);
+
+	pr_warn_ratelimited("\"%s\" (%ld) uses obsolete SWP{B} instruction at 0x%llx\n",
+			current->comm, (unsigned long)current->pid, regs->pc);
+
+	regs->pc += 4;
+	return 0;
+
+fault:
+	set_segfault(regs, address);
+
+	return 0;
+}
+
+/*
+ * Only emulate SWP/SWPB executed in ARM state/User mode.
+ * The kernel must be SWP free and SWP{B} does not exist in Thumb.
+ */
+static struct undef_hook swp_hooks[] = {
+	{
+		.instr_mask	= 0x0fb00ff0,
+		.instr_val	= 0x01000090,
+		.pstate_mask	= COMPAT_PSR_MODE_MASK,
+		.pstate_val	= COMPAT_PSR_MODE_USR,
+		.fn		= swp_handler
+	},
+	{ }
+};
+
+static struct insn_emulation_ops swp_ops = {
+	.name = "swp",
+	.status = INSN_OBSOLETE,
+	.hooks = swp_hooks,
+	.set_hw_mode = NULL,
+};
+
+static int cp15barrier_handler(struct pt_regs *regs, u32 instr)
+{
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
+
+	switch (arm_check_condition(instr, regs->pstate)) {
+	case ARM_OPCODE_CONDTEST_PASS:
+		break;
+	case ARM_OPCODE_CONDTEST_FAIL:
+		/* Condition failed - return to next instruction */
+		goto ret;
+	case ARM_OPCODE_CONDTEST_UNCOND:
+		/* If unconditional encoding - not a barrier instruction */
+		return -EFAULT;
+	default:
+		return -EINVAL;
+	}
+
+	switch (aarch32_insn_mcr_extract_crm(instr)) {
+	case 10:
+		/*
+		 * dmb - mcr p15, 0, Rt, c7, c10, 5
+		 * dsb - mcr p15, 0, Rt, c7, c10, 4
+		 */
+		if (aarch32_insn_mcr_extract_opc2(instr) == 5) {
+			dmb(sy);
+			trace_instruction_emulation(
+				"mcr p15, 0, Rt, c7, c10, 5 ; dmb", regs->pc);
+		} else {
+			dsb(sy);
+			trace_instruction_emulation(
+				"mcr p15, 0, Rt, c7, c10, 4 ; dsb", regs->pc);
+		}
+		break;
+	case 5:
+		/*
+		 * isb - mcr p15, 0, Rt, c7, c5, 4
+		 *
+		 * Taking an exception or returning from one acts as an
+		 * instruction barrier. So no explicit barrier needed here.
+		 */
+		trace_instruction_emulation(
+			"mcr p15, 0, Rt, c7, c5, 4 ; isb", regs->pc);
+		break;
+	}
+
+ret:
+	pr_warn_ratelimited("\"%s\" (%ld) uses deprecated CP15 Barrier instruction at 0x%llx\n",
+			current->comm, (unsigned long)current->pid, regs->pc);
+
+	regs->pc += 4;
+	return 0;
+}
+
+static inline void config_sctlr_el1(u32 clear, u32 set)
+{
+	u32 val;
+
+	asm volatile("mrs %0, sctlr_el1" : "=r" (val));
+	val &= ~clear;
+	val |= set;
+	asm volatile("msr sctlr_el1, %0" : : "r" (val));
+}
+
+static int cp15_barrier_set_hw_mode(bool enable)
+{
+	if (enable)
+		config_sctlr_el1(0, SCTLR_EL1_CP15BEN);
+	else
+		config_sctlr_el1(SCTLR_EL1_CP15BEN, 0);
+	return 0;
+}
+
+static struct undef_hook cp15_barrier_hooks[] = {
+	{
+		.instr_mask	= 0x0fff0fdf,
+		.instr_val	= 0x0e070f9a,
+		.pstate_mask	= COMPAT_PSR_MODE_MASK,
+		.pstate_val	= COMPAT_PSR_MODE_USR,
+		.fn		= cp15barrier_handler,
+	},
+	{
+		.instr_mask	= 0x0fff0fff,
+		.instr_val	= 0x0e070f95,
+		.pstate_mask	= COMPAT_PSR_MODE_MASK,
+		.pstate_val	= COMPAT_PSR_MODE_USR,
+		.fn		= cp15barrier_handler,
+	},
+	{ }
+};
+
+static struct insn_emulation_ops cp15_barrier_ops = {
+	.name = "cp15_barrier",
+	.status = INSN_DEPRECATED,
+	.hooks = cp15_barrier_hooks,
+	.set_hw_mode = cp15_barrier_set_hw_mode,
+};
+
+static int setend_set_hw_mode(bool enable)
+{
+	if (!cpu_supports_mixed_endian_el0())
+		return -EINVAL;
+
+	if (enable)
+		config_sctlr_el1(SCTLR_EL1_SED, 0);
+	else
+		config_sctlr_el1(0, SCTLR_EL1_SED);
+	return 0;
+}
+
+static int compat_setend_handler(struct pt_regs *regs, u32 big_endian)
+{
+	char *insn;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
+
+	if (big_endian) {
+		insn = "setend be";
+		regs->pstate |= COMPAT_PSR_E_BIT;
+	} else {
+		insn = "setend le";
+		regs->pstate &= ~COMPAT_PSR_E_BIT;
+	}
+
+	trace_instruction_emulation(insn, regs->pc);
+	pr_warn_ratelimited("\"%s\" (%ld) uses deprecated setend instruction at 0x%llx\n",
+			current->comm, (unsigned long)current->pid, regs->pc);
+
+	return 0;
+}
+
+static int a32_setend_handler(struct pt_regs *regs, u32 instr)
+{
+	int rc = compat_setend_handler(regs, (instr >> 9) & 1);
+	regs->pc += 4;
+	return rc;
+}
+
+static int t16_setend_handler(struct pt_regs *regs, u32 instr)
+{
+	int rc = compat_setend_handler(regs, (instr >> 3) & 1);
+	regs->pc += 2;
+	return rc;
+}
+
+static struct undef_hook setend_hooks[] = {
+	{
+		.instr_mask	= 0xfffffdff,
+		.instr_val	= 0xf1010000,
+		.pstate_mask	= COMPAT_PSR_MODE_MASK,
+		.pstate_val	= COMPAT_PSR_MODE_USR,
+		.fn		= a32_setend_handler,
+	},
+	{
+		/* Thumb mode */
+		.instr_mask	= 0x0000fff7,
+		.instr_val	= 0x0000b650,
+		.pstate_mask	= (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK),
+		.pstate_val	= (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR),
+		.fn		= t16_setend_handler,
+	},
+	{}
+};
+
+static struct insn_emulation_ops setend_ops = {
+	.name = "setend",
+	.status = INSN_DEPRECATED,
+	.hooks = setend_hooks,
+	.set_hw_mode = setend_set_hw_mode,
+};
+
+static int insn_cpu_hotplug_notify(struct notifier_block *b,
+			      unsigned long action, void *hcpu)
+{
+	int rc = 0;
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING)
+		rc = run_all_insn_set_hw_mode((unsigned long)hcpu);
+
+	return notifier_from_errno(rc);
+}
+
+static struct notifier_block insn_cpu_hotplug_notifier = {
+	.notifier_call = insn_cpu_hotplug_notify,
+};
+
+/*
+ * Invoked as late_initcall, since not needed before init spawned.
+ */
+static int __init armv8_deprecated_init(void)
+{
+	if (IS_ENABLED(CONFIG_SWP_EMULATION))
+		register_insn_emulation(&swp_ops);
+
+	if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION))
+		register_insn_emulation(&cp15_barrier_ops);
+
+	if (IS_ENABLED(CONFIG_SETEND_EMULATION)) {
+		if(system_supports_mixed_endian_el0())
+			register_insn_emulation(&setend_ops);
+		else
+			pr_info("setend instruction emulation is not supported on the system");
+	}
+
+	register_cpu_notifier(&insn_cpu_hotplug_notifier);
+	register_insn_emulation_sysctl(ctl_abi);
+
+	return 0;
+}
+
+late_initcall(armv8_deprecated_init);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
new file mode 100644
index 000000000..da675cc5d
--- /dev/null
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -0,0 +1,161 @@
+/*
+ * Based on arch/arm/kernel/asm-offsets.c
+ *
+ * Copyright (C) 1995-2003 Russell King
+ *               2001-2002 Keith Owens
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/kvm_host.h>
+#include <asm/thread_info.h>
+#include <asm/memory.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+#include <asm/vdso_datapage.h>
+#include <linux/kbuild.h>
+
+int main(void)
+{
+  DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm));
+  BLANK();
+  DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
+  DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
+  DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
+  DEFINE(TI_TASK,		offsetof(struct thread_info, task));
+  DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
+  BLANK();
+  DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
+  BLANK();
+  DEFINE(S_X0,			offsetof(struct pt_regs, regs[0]));
+  DEFINE(S_X1,			offsetof(struct pt_regs, regs[1]));
+  DEFINE(S_X2,			offsetof(struct pt_regs, regs[2]));
+  DEFINE(S_X3,			offsetof(struct pt_regs, regs[3]));
+  DEFINE(S_X4,			offsetof(struct pt_regs, regs[4]));
+  DEFINE(S_X5,			offsetof(struct pt_regs, regs[5]));
+  DEFINE(S_X6,			offsetof(struct pt_regs, regs[6]));
+  DEFINE(S_X7,			offsetof(struct pt_regs, regs[7]));
+  DEFINE(S_LR,			offsetof(struct pt_regs, regs[30]));
+  DEFINE(S_SP,			offsetof(struct pt_regs, sp));
+#ifdef CONFIG_COMPAT
+  DEFINE(S_COMPAT_SP,		offsetof(struct pt_regs, compat_sp));
+#endif
+  DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate));
+  DEFINE(S_PC,			offsetof(struct pt_regs, pc));
+  DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
+  DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
+  DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
+  BLANK();
+  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
+  BLANK();
+  DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
+  DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags));
+  BLANK();
+  DEFINE(VM_EXEC,	       	VM_EXEC);
+  BLANK();
+  DEFINE(PAGE_SZ,	       	PAGE_SIZE);
+  BLANK();
+  DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
+  DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
+  DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
+  BLANK();
+  DEFINE(CLOCK_REALTIME,	CLOCK_REALTIME);
+  DEFINE(CLOCK_MONOTONIC,	CLOCK_MONOTONIC);
+  DEFINE(CLOCK_REALTIME_RES,	MONOTONIC_RES_NSEC);
+  DEFINE(CLOCK_REALTIME_COARSE,	CLOCK_REALTIME_COARSE);
+  DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
+  DEFINE(CLOCK_COARSE_RES,	LOW_RES_NSEC);
+  DEFINE(NSEC_PER_SEC,		NSEC_PER_SEC);
+  BLANK();
+  DEFINE(VDSO_CS_CYCLE_LAST,	offsetof(struct vdso_data, cs_cycle_last));
+  DEFINE(VDSO_XTIME_CLK_SEC,	offsetof(struct vdso_data, xtime_clock_sec));
+  DEFINE(VDSO_XTIME_CLK_NSEC,	offsetof(struct vdso_data, xtime_clock_nsec));
+  DEFINE(VDSO_XTIME_CRS_SEC,	offsetof(struct vdso_data, xtime_coarse_sec));
+  DEFINE(VDSO_XTIME_CRS_NSEC,	offsetof(struct vdso_data, xtime_coarse_nsec));
+  DEFINE(VDSO_WTM_CLK_SEC,	offsetof(struct vdso_data, wtm_clock_sec));
+  DEFINE(VDSO_WTM_CLK_NSEC,	offsetof(struct vdso_data, wtm_clock_nsec));
+  DEFINE(VDSO_TB_SEQ_COUNT,	offsetof(struct vdso_data, tb_seq_count));
+  DEFINE(VDSO_CS_MULT,		offsetof(struct vdso_data, cs_mult));
+  DEFINE(VDSO_CS_SHIFT,		offsetof(struct vdso_data, cs_shift));
+  DEFINE(VDSO_TZ_MINWEST,	offsetof(struct vdso_data, tz_minuteswest));
+  DEFINE(VDSO_TZ_DSTTIME,	offsetof(struct vdso_data, tz_dsttime));
+  DEFINE(VDSO_USE_SYSCALL,	offsetof(struct vdso_data, use_syscall));
+  BLANK();
+  DEFINE(TVAL_TV_SEC,		offsetof(struct timeval, tv_sec));
+  DEFINE(TVAL_TV_USEC,		offsetof(struct timeval, tv_usec));
+  DEFINE(TSPEC_TV_SEC,		offsetof(struct timespec, tv_sec));
+  DEFINE(TSPEC_TV_NSEC,		offsetof(struct timespec, tv_nsec));
+  BLANK();
+  DEFINE(TZ_MINWEST,		offsetof(struct timezone, tz_minuteswest));
+  DEFINE(TZ_DSTTIME,		offsetof(struct timezone, tz_dsttime));
+  BLANK();
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
+  DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
+  DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
+  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
+  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
+  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
+  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
+  DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
+  DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
+  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
+  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_SAVE_FN,		offsetof(struct vgic_sr_vectors, save_vgic));
+  DEFINE(VGIC_RESTORE_FN,	offsetof(struct vgic_sr_vectors, restore_vgic));
+  DEFINE(VGIC_SR_VECTOR_SZ,	sizeof(struct vgic_sr_vectors));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_V3_CPU_SRE,	offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
+  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
+#endif
+#ifdef CONFIG_CPU_PM
+  DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
+  DEFINE(CPU_CTX_SP,		offsetof(struct cpu_suspend_ctx, sp));
+  DEFINE(MPIDR_HASH_MASK,	offsetof(struct mpidr_hash, mask));
+  DEFINE(MPIDR_HASH_SHIFTS,	offsetof(struct mpidr_hash, shift_aff));
+  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
+  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
+  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
+#endif
+  return 0;
+}
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
new file mode 100644
index 000000000..b8629d52f
--- /dev/null
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -0,0 +1,128 @@
+/*
+ *  ARM64 cacheinfo support
+ *
+ *  Copyright (C) 2015 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bitops.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpu.h>
+#include <linux/compiler.h>
+#include <linux/of.h>
+
+#include <asm/cachetype.h>
+#include <asm/processor.h>
+
+#define MAX_CACHE_LEVEL			7	/* Max 7 level supported */
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level)	(3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level)		(7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level)	\
+	(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+static inline enum cache_type get_cache_type(int level)
+{
+	u64 clidr;
+
+	if (level > MAX_CACHE_LEVEL)
+		return CACHE_TYPE_NOCACHE;
+	asm volatile ("mrs     %x0, clidr_el1" : "=r" (clidr));
+	return CLIDR_CTYPE(clidr, level);
+}
+
+/*
+ * Cache Size Selection Register(CSSELR) selects which Cache Size ID
+ * Register(CCSIDR) is accessible by specifying the required cache
+ * level and the cache type. We need to ensure that no one else changes
+ * CSSELR by calling this in non-preemtible context
+ */
+u64 __attribute_const__ cache_get_ccsidr(u64 csselr)
+{
+	u64 ccsidr;
+
+	WARN_ON(preemptible());
+
+	/* Put value into CSSELR */
+	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	isb();
+	/* Read result out of CCSIDR */
+	asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr));
+
+	return ccsidr;
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf,
+			 enum cache_type type, unsigned int level)
+{
+	bool is_icache = type & CACHE_TYPE_INST;
+	u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache);
+
+	this_leaf->level = level;
+	this_leaf->type = type;
+	this_leaf->coherency_line_size = CACHE_LINESIZE(tmp);
+	this_leaf->number_of_sets = CACHE_NUMSETS(tmp);
+	this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp);
+	this_leaf->size = this_leaf->number_of_sets *
+	    this_leaf->coherency_line_size * this_leaf->ways_of_associativity;
+	this_leaf->attributes =
+		((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) |
+		((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) |
+		((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) |
+		((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0);
+}
+
+static int __init_cache_level(unsigned int cpu)
+{
+	unsigned int ctype, level, leaves;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+
+	for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
+		ctype = get_cache_type(level);
+		if (ctype == CACHE_TYPE_NOCACHE) {
+			level--;
+			break;
+		}
+		/* Separate instruction and data caches */
+		leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
+	}
+
+	this_cpu_ci->num_levels = level;
+	this_cpu_ci->num_leaves = leaves;
+	return 0;
+}
+
+static int __populate_cache_leaves(unsigned int cpu)
+{
+	unsigned int level, idx;
+	enum cache_type type;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+
+	for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
+	     idx < this_cpu_ci->num_leaves; idx++, level++) {
+		type = get_cache_type(level);
+		if (type == CACHE_TYPE_SEPARATE) {
+			ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
+			ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+		} else {
+			ci_leaf_init(this_leaf++, type, level);
+		}
+	}
+	return 0;
+}
+
+DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
+DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
new file mode 100644
index 000000000..6ffd91438
--- /dev/null
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -0,0 +1,92 @@
+/*
+ * Contains CPU specific errata definitions
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/cpufeature.h>
+
+#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+
+#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+			MIDR_ARCHITECTURE_MASK)
+
+static bool __maybe_unused
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
+{
+	u32 midr = read_cpuid_id();
+
+	if ((midr & CPU_MODEL_MASK) != entry->midr_model)
+		return false;
+
+	midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
+
+	return (midr >= entry->midr_range_min && midr <= entry->midr_range_max);
+}
+
+#define MIDR_RANGE(model, min, max) \
+	.matches = is_affected_midr_range, \
+	.midr_model = model, \
+	.midr_range_min = min, \
+	.midr_range_max = max
+
+const struct arm64_cpu_capabilities arm64_errata[] = {
+#if	defined(CONFIG_ARM64_ERRATUM_826319) || \
+	defined(CONFIG_ARM64_ERRATUM_827319) || \
+	defined(CONFIG_ARM64_ERRATUM_824069)
+	{
+	/* Cortex-A53 r0p[012] */
+		.desc = "ARM errata 826319, 827319, 824069",
+		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_819472
+	{
+	/* Cortex-A53 r0p[01] */
+		.desc = "ARM errata 819472",
+		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_832075
+	{
+	/* Cortex-A57 r0p0 - r1p2 */
+		.desc = "ARM erratum 832075",
+		.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
+		MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
+			   (1 << MIDR_VARIANT_SHIFT) | 2),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	{
+	/* Cortex-A53 r0p[01234] */
+		.desc = "ARM erratum 845719",
+		.capability = ARM64_WORKAROUND_845719,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
+	},
+#endif
+	{
+	}
+};
+
+void check_local_cpu_errata(void)
+{
+	check_cpu_capabilities(arm64_errata, "enabling workaround for");
+}
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
new file mode 100644
index 000000000..fb8ff9ba4
--- /dev/null
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -0,0 +1,87 @@
+/*
+ * CPU kernel entry/exit control
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/string.h>
+
+extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations cpu_psci_ops;
+
+const struct cpu_operations *cpu_ops[NR_CPUS];
+
+static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_SMP
+	&smp_spin_table_ops,
+#endif
+	&cpu_psci_ops,
+	NULL,
+};
+
+const struct cpu_operations * __init cpu_get_ops(const char *name)
+{
+	const struct cpu_operations **ops = supported_cpu_ops;
+
+	while (*ops) {
+		if (!strcmp(name, (*ops)->name))
+			return *ops;
+
+		ops++;
+	}
+
+	return NULL;
+}
+
+/*
+ * Read a cpu's enable method from the device tree and record it in cpu_ops.
+ */
+int __init cpu_read_ops(struct device_node *dn, int cpu)
+{
+	const char *enable_method = of_get_property(dn, "enable-method", NULL);
+	if (!enable_method) {
+		/*
+		 * The boot CPU may not have an enable method (e.g. when
+		 * spin-table is used for secondaries). Don't warn spuriously.
+		 */
+		if (cpu != 0)
+			pr_err("%s: missing enable-method property\n",
+				dn->full_name);
+		return -ENOENT;
+	}
+
+	cpu_ops[cpu] = cpu_get_ops(enable_method);
+	if (!cpu_ops[cpu]) {
+		pr_warn("%s: unsupported enable-method property: %s\n",
+			dn->full_name, enable_method);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+void __init cpu_read_bootcpu_ops(void)
+{
+	struct device_node *dn = of_get_cpu_node(0, NULL);
+	if (!dn) {
+		pr_err("Failed to find device node for boot cpu\n");
+		return;
+	}
+	cpu_read_ops(dn, 0);
+}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
new file mode 100644
index 000000000..3d9967e43
--- /dev/null
+++ b/arch/arm64/kernel/cpufeature.c
@@ -0,0 +1,47 @@
+/*
+ * Contains CPU feature definitions
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cpufeature.h>
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
+	{},
+};
+
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info)
+{
+	int i;
+
+	for (i = 0; caps[i].desc; i++) {
+		if (!caps[i].matches(&caps[i]))
+			continue;
+
+		if (!cpus_have_cap(caps[i].capability))
+			pr_info("%s %s\n", info, caps[i].desc);
+		cpus_set_cap(caps[i].capability);
+	}
+}
+
+void check_local_cpu_features(void)
+{
+	check_cpu_capabilities(arm64_features, "detected feature");
+}
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
new file mode 100644
index 000000000..a78143a5c
--- /dev/null
+++ b/arch/arm64/kernel/cpuidle.c
@@ -0,0 +1,51 @@
+/*
+ * ARM64 CPU idle arch support
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/cpuidle.h>
+#include <asm/cpu_ops.h>
+
+int arm_cpuidle_init(unsigned int cpu)
+{
+	int ret = -EOPNOTSUPP;
+	struct device_node *cpu_node = of_cpu_device_node_get(cpu);
+
+	if (!cpu_node)
+		return -ENODEV;
+
+	if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)
+		ret = cpu_ops[cpu]->cpu_init_idle(cpu_node, cpu);
+
+	of_node_put(cpu_node);
+	return ret;
+}
+
+/**
+ * cpu_suspend() - function to enter a low-power idle state
+ * @arg: argument to pass to CPU suspend operations
+ *
+ * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
+ * operations back-end error code otherwise.
+ */
+int cpu_suspend(unsigned long arg)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * If cpu_ops have not been registered or suspend
+	 * has not been initialized, cpu_suspend call fails early.
+	 */
+	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
+		return -EOPNOTSUPP;
+	return cpu_ops[cpu]->cpu_suspend(arg);
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
new file mode 100644
index 000000000..75d5a867e
--- /dev/null
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -0,0 +1,256 @@
+/*
+ * Record and handle CPU attributes.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <asm/arch_timer.h>
+#include <asm/cachetype.h>
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/cpufeature.h>
+
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/preempt.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+/*
+ * In case the boot CPU is hotpluggable, we record its initial state and
+ * current state separately. Certain system registers may contain different
+ * values depending on configuration at or after reset.
+ */
+DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
+static struct cpuinfo_arm64 boot_cpu_data;
+static bool mixed_endian_el0 = true;
+
+static char *icache_policy_str[] = {
+	[ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
+	[ICACHE_POLICY_AIVIVT] = "AIVIVT",
+	[ICACHE_POLICY_VIPT] = "VIPT",
+	[ICACHE_POLICY_PIPT] = "PIPT",
+};
+
+unsigned long __icache_flags;
+
+static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
+{
+	unsigned int cpu = smp_processor_id();
+	u32 l1ip = CTR_L1IP(info->reg_ctr);
+
+	if (l1ip != ICACHE_POLICY_PIPT) {
+		/*
+		 * VIPT caches are non-aliasing if the VA always equals the PA
+		 * in all bit positions that are covered by the index. This is
+		 * the case if the size of a way (# of sets * line size) does
+		 * not exceed PAGE_SIZE.
+		 */
+		u32 waysize = icache_get_numsets() * icache_get_linesize();
+
+		if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
+			set_bit(ICACHEF_ALIASING, &__icache_flags);
+	}
+	if (l1ip == ICACHE_POLICY_AIVIVT)
+		set_bit(ICACHEF_AIVIVT, &__icache_flags);
+
+	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
+}
+
+bool cpu_supports_mixed_endian_el0(void)
+{
+	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+}
+
+bool system_supports_mixed_endian_el0(void)
+{
+	return mixed_endian_el0;
+}
+
+static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
+{
+	mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
+}
+
+static void update_cpu_features(struct cpuinfo_arm64 *info)
+{
+	update_mixed_endian_el0_support(info);
+}
+
+static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
+{
+	if ((boot & mask) == (cur & mask))
+		return 0;
+
+	pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n",
+		name, (unsigned long)boot, cpu, (unsigned long)cur);
+
+	return 1;
+}
+
+#define CHECK_MASK(field, mask, boot, cur, cpu) \
+	check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
+
+#define CHECK(field, boot, cur, cpu) \
+	CHECK_MASK(field, ~0ULL, boot, cur, cpu)
+
+/*
+ * Verify that CPUs don't have unexpected differences that will cause problems.
+ */
+static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
+{
+	unsigned int cpu = smp_processor_id();
+	struct cpuinfo_arm64 *boot = &boot_cpu_data;
+	unsigned int diff = 0;
+
+	/*
+	 * The kernel can handle differing I-cache policies, but otherwise
+	 * caches should look identical. Userspace JITs will make use of
+	 * *minLine.
+	 */
+	diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
+
+	/*
+	 * Userspace may perform DC ZVA instructions. Mismatched block sizes
+	 * could result in too much or too little memory being zeroed if a
+	 * process is preempted and migrated between CPUs.
+	 */
+	diff |= CHECK(dczid, boot, cur, cpu);
+
+	/* If different, timekeeping will be broken (especially with KVM) */
+	diff |= CHECK(cntfrq, boot, cur, cpu);
+
+	/*
+	 * The kernel uses self-hosted debug features and expects CPUs to
+	 * support identical debug features. We presently need CTX_CMPs, WRPs,
+	 * and BRPs to be identical.
+	 * ID_AA64DFR1 is currently RES0.
+	 */
+	diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
+	diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
+
+	/*
+	 * Even in big.LITTLE, processors should be identical instruction-set
+	 * wise.
+	 */
+	diff |= CHECK(id_aa64isar0, boot, cur, cpu);
+	diff |= CHECK(id_aa64isar1, boot, cur, cpu);
+
+	/*
+	 * Differing PARange support is fine as long as all peripherals and
+	 * memory are mapped within the minimum PARange of all CPUs.
+	 * Linux should not care about secure memory.
+	 * ID_AA64MMFR1 is currently RES0.
+	 */
+	diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
+	diff |= CHECK(id_aa64mmfr1, boot, cur, cpu);
+
+	/*
+	 * EL3 is not our concern.
+	 * ID_AA64PFR1 is currently RES0.
+	 */
+	diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu);
+	diff |= CHECK(id_aa64pfr1, boot, cur, cpu);
+
+	/*
+	 * If we have AArch32, we care about 32-bit features for compat. These
+	 * registers should be RES0 otherwise.
+	 */
+	diff |= CHECK(id_dfr0, boot, cur, cpu);
+	diff |= CHECK(id_isar0, boot, cur, cpu);
+	diff |= CHECK(id_isar1, boot, cur, cpu);
+	diff |= CHECK(id_isar2, boot, cur, cpu);
+	diff |= CHECK(id_isar3, boot, cur, cpu);
+	diff |= CHECK(id_isar4, boot, cur, cpu);
+	diff |= CHECK(id_isar5, boot, cur, cpu);
+	/*
+	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+	 * ACTLR formats could differ across CPUs and therefore would have to
+	 * be trapped for virtualization anyway.
+	 */
+	diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu);
+	diff |= CHECK(id_mmfr1, boot, cur, cpu);
+	diff |= CHECK(id_mmfr2, boot, cur, cpu);
+	diff |= CHECK(id_mmfr3, boot, cur, cpu);
+	diff |= CHECK(id_pfr0, boot, cur, cpu);
+	diff |= CHECK(id_pfr1, boot, cur, cpu);
+
+	diff |= CHECK(mvfr0, boot, cur, cpu);
+	diff |= CHECK(mvfr1, boot, cur, cpu);
+	diff |= CHECK(mvfr2, boot, cur, cpu);
+
+	/*
+	 * Mismatched CPU features are a recipe for disaster. Don't even
+	 * pretend to support them.
+	 */
+	WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
+			"Unsupported CPU feature variation.\n");
+}
+
+static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
+{
+	info->reg_cntfrq = arch_timer_get_cntfrq();
+	info->reg_ctr = read_cpuid_cachetype();
+	info->reg_dczid = read_cpuid(DCZID_EL0);
+	info->reg_midr = read_cpuid_id();
+
+	info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
+	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
+	info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
+	info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+	info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+	info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+	info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+
+	info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+	info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+	info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+	info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+	info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+	info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+	info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+	info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+	info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+	info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+	info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+	info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+	info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+
+	info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+	info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+	info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+
+	cpuinfo_detect_icache_policy(info);
+
+	check_local_cpu_errata();
+	check_local_cpu_features();
+	update_cpu_features(info);
+}
+
+void cpuinfo_store_cpu(void)
+{
+	struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
+	__cpuinfo_store_cpu(info);
+	cpuinfo_sanity_check(info);
+}
+
+void __init cpuinfo_store_boot_cpu(void)
+{
+	struct cpuinfo_arm64 *info = &per_cpu(cpu_data, 0);
+	__cpuinfo_store_cpu(info);
+
+	boot_cpu_data = *info;
+}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
new file mode 100644
index 000000000..b056369fd
--- /dev/null
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -0,0 +1,429 @@
+/*
+ * ARMv8 single-step debug support and mdscr context switching.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/hardirq.h>
+#include <linux/init.h>
+#include <linux/ptrace.h>
+#include <linux/stat.h>
+#include <linux/uaccess.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/cputype.h>
+#include <asm/system_misc.h>
+
+/* Determine debug architecture. */
+u8 debug_monitors_arch(void)
+{
+	return read_cpuid(ID_AA64DFR0_EL1) & 0xf;
+}
+
+/*
+ * MDSCR access routines.
+ */
+static void mdscr_write(u32 mdscr)
+{
+	unsigned long flags;
+	local_dbg_save(flags);
+	asm volatile("msr mdscr_el1, %0" :: "r" (mdscr));
+	local_dbg_restore(flags);
+}
+
+static u32 mdscr_read(void)
+{
+	u32 mdscr;
+	asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr));
+	return mdscr;
+}
+
+/*
+ * Allow root to disable self-hosted debug from userspace.
+ * This is useful if you want to connect an external JTAG debugger.
+ */
+static u32 debug_enabled = 1;
+
+static int create_debug_debugfs_entry(void)
+{
+	debugfs_create_bool("debug_enabled", 0644, NULL, &debug_enabled);
+	return 0;
+}
+fs_initcall(create_debug_debugfs_entry);
+
+static int __init early_debug_disable(char *buf)
+{
+	debug_enabled = 0;
+	return 0;
+}
+
+early_param("nodebugmon", early_debug_disable);
+
+/*
+ * Keep track of debug users on each core.
+ * The ref counts are per-cpu so we use a local_t type.
+ */
+static DEFINE_PER_CPU(int, mde_ref_count);
+static DEFINE_PER_CPU(int, kde_ref_count);
+
+void enable_debug_monitors(enum debug_el el)
+{
+	u32 mdscr, enable = 0;
+
+	WARN_ON(preemptible());
+
+	if (this_cpu_inc_return(mde_ref_count) == 1)
+		enable = DBG_MDSCR_MDE;
+
+	if (el == DBG_ACTIVE_EL1 &&
+	    this_cpu_inc_return(kde_ref_count) == 1)
+		enable |= DBG_MDSCR_KDE;
+
+	if (enable && debug_enabled) {
+		mdscr = mdscr_read();
+		mdscr |= enable;
+		mdscr_write(mdscr);
+	}
+}
+
+void disable_debug_monitors(enum debug_el el)
+{
+	u32 mdscr, disable = 0;
+
+	WARN_ON(preemptible());
+
+	if (this_cpu_dec_return(mde_ref_count) == 0)
+		disable = ~DBG_MDSCR_MDE;
+
+	if (el == DBG_ACTIVE_EL1 &&
+	    this_cpu_dec_return(kde_ref_count) == 0)
+		disable &= ~DBG_MDSCR_KDE;
+
+	if (disable) {
+		mdscr = mdscr_read();
+		mdscr &= disable;
+		mdscr_write(mdscr);
+	}
+}
+
+/*
+ * OS lock clearing.
+ */
+static void clear_os_lock(void *unused)
+{
+	asm volatile("msr oslar_el1, %0" : : "r" (0));
+}
+
+static int os_lock_notify(struct notifier_block *self,
+				    unsigned long action, void *data)
+{
+	int cpu = (unsigned long)data;
+	if (action == CPU_ONLINE)
+		smp_call_function_single(cpu, clear_os_lock, NULL, 1);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block os_lock_nb = {
+	.notifier_call = os_lock_notify,
+};
+
+static int debug_monitors_init(void)
+{
+	cpu_notifier_register_begin();
+
+	/* Clear the OS lock. */
+	on_each_cpu(clear_os_lock, NULL, 1);
+	isb();
+	local_dbg_enable();
+
+	/* Register hotplug handler. */
+	__register_cpu_notifier(&os_lock_nb);
+
+	cpu_notifier_register_done();
+	return 0;
+}
+postcore_initcall(debug_monitors_init);
+
+/*
+ * Single step API and exception handling.
+ */
+static void set_regs_spsr_ss(struct pt_regs *regs)
+{
+	unsigned long spsr;
+
+	spsr = regs->pstate;
+	spsr &= ~DBG_SPSR_SS;
+	spsr |= DBG_SPSR_SS;
+	regs->pstate = spsr;
+}
+
+static void clear_regs_spsr_ss(struct pt_regs *regs)
+{
+	unsigned long spsr;
+
+	spsr = regs->pstate;
+	spsr &= ~DBG_SPSR_SS;
+	regs->pstate = spsr;
+}
+
+/* EL1 Single Step Handler hooks */
+static LIST_HEAD(step_hook);
+static DEFINE_RWLOCK(step_hook_lock);
+
+void register_step_hook(struct step_hook *hook)
+{
+	write_lock(&step_hook_lock);
+	list_add(&hook->node, &step_hook);
+	write_unlock(&step_hook_lock);
+}
+
+void unregister_step_hook(struct step_hook *hook)
+{
+	write_lock(&step_hook_lock);
+	list_del(&hook->node);
+	write_unlock(&step_hook_lock);
+}
+
+/*
+ * Call registered single step handers
+ * There is no Syndrome info to check for determining the handler.
+ * So we call all the registered handlers, until the right handler is
+ * found which returns zero.
+ */
+static int call_step_hook(struct pt_regs *regs, unsigned int esr)
+{
+	struct step_hook *hook;
+	int retval = DBG_HOOK_ERROR;
+
+	read_lock(&step_hook_lock);
+
+	list_for_each_entry(hook, &step_hook, node)	{
+		retval = hook->fn(regs, esr);
+		if (retval == DBG_HOOK_HANDLED)
+			break;
+	}
+
+	read_unlock(&step_hook_lock);
+
+	return retval;
+}
+
+static int single_step_handler(unsigned long addr, unsigned int esr,
+			       struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	/*
+	 * If we are stepping a pending breakpoint, call the hw_breakpoint
+	 * handler first.
+	 */
+	if (!reinstall_suspended_bps(regs))
+		return 0;
+
+	if (user_mode(regs)) {
+		info.si_signo = SIGTRAP;
+		info.si_errno = 0;
+		info.si_code  = TRAP_HWBKPT;
+		info.si_addr  = (void __user *)instruction_pointer(regs);
+		force_sig_info(SIGTRAP, &info, current);
+
+		/*
+		 * ptrace will disable single step unless explicitly
+		 * asked to re-enable it. For other clients, it makes
+		 * sense to leave it enabled (i.e. rewind the controls
+		 * to the active-not-pending state).
+		 */
+		user_rewind_single_step(current);
+	} else {
+		if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
+			return 0;
+
+		pr_warning("Unexpected kernel single-step exception at EL1\n");
+		/*
+		 * Re-enable stepping since we know that we will be
+		 * returning to regs.
+		 */
+		set_regs_spsr_ss(regs);
+	}
+
+	return 0;
+}
+
+/*
+ * Breakpoint handler is re-entrant as another breakpoint can
+ * hit within breakpoint handler, especically in kprobes.
+ * Use reader/writer locks instead of plain spinlock.
+ */
+static LIST_HEAD(break_hook);
+static DEFINE_RWLOCK(break_hook_lock);
+
+void register_break_hook(struct break_hook *hook)
+{
+	write_lock(&break_hook_lock);
+	list_add(&hook->node, &break_hook);
+	write_unlock(&break_hook_lock);
+}
+
+void unregister_break_hook(struct break_hook *hook)
+{
+	write_lock(&break_hook_lock);
+	list_del(&hook->node);
+	write_unlock(&break_hook_lock);
+}
+
+static int call_break_hook(struct pt_regs *regs, unsigned int esr)
+{
+	struct break_hook *hook;
+	int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
+
+	read_lock(&break_hook_lock);
+	list_for_each_entry(hook, &break_hook, node)
+		if ((esr & hook->esr_mask) == hook->esr_val)
+			fn = hook->fn;
+	read_unlock(&break_hook_lock);
+
+	return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
+}
+
+static int brk_handler(unsigned long addr, unsigned int esr,
+		       struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if (user_mode(regs)) {
+		info = (siginfo_t) {
+			.si_signo = SIGTRAP,
+			.si_errno = 0,
+			.si_code  = TRAP_BRKPT,
+			.si_addr  = (void __user *)instruction_pointer(regs),
+		};
+
+		force_sig_info(SIGTRAP, &info, current);
+	} else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
+		pr_warning("Unexpected kernel BRK exception at EL1\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int aarch32_break_handler(struct pt_regs *regs)
+{
+	siginfo_t info;
+	u32 arm_instr;
+	u16 thumb_instr;
+	bool bp = false;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+
+	if (!compat_user_mode(regs))
+		return -EFAULT;
+
+	if (compat_thumb_mode(regs)) {
+		/* get 16-bit Thumb instruction */
+		get_user(thumb_instr, (u16 __user *)pc);
+		thumb_instr = le16_to_cpu(thumb_instr);
+		if (thumb_instr == AARCH32_BREAK_THUMB2_LO) {
+			/* get second half of 32-bit Thumb-2 instruction */
+			get_user(thumb_instr, (u16 __user *)(pc + 2));
+			thumb_instr = le16_to_cpu(thumb_instr);
+			bp = thumb_instr == AARCH32_BREAK_THUMB2_HI;
+		} else {
+			bp = thumb_instr == AARCH32_BREAK_THUMB;
+		}
+	} else {
+		/* 32-bit ARM instruction */
+		get_user(arm_instr, (u32 __user *)pc);
+		arm_instr = le32_to_cpu(arm_instr);
+		bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM;
+	}
+
+	if (!bp)
+		return -EFAULT;
+
+	info = (siginfo_t) {
+		.si_signo = SIGTRAP,
+		.si_errno = 0,
+		.si_code  = TRAP_BRKPT,
+		.si_addr  = pc,
+	};
+
+	force_sig_info(SIGTRAP, &info, current);
+	return 0;
+}
+
+static int __init debug_traps_init(void)
+{
+	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
+			      TRAP_HWBKPT, "single-step handler");
+	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
+			      TRAP_BRKPT, "ptrace BRK handler");
+	return 0;
+}
+arch_initcall(debug_traps_init);
+
+/* Re-enable single step for syscall restarting. */
+void user_rewind_single_step(struct task_struct *task)
+{
+	/*
+	 * If single step is active for this thread, then set SPSR.SS
+	 * to 1 to avoid returning to the active-pending state.
+	 */
+	if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP))
+		set_regs_spsr_ss(task_pt_regs(task));
+}
+
+void user_fastforward_single_step(struct task_struct *task)
+{
+	if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP))
+		clear_regs_spsr_ss(task_pt_regs(task));
+}
+
+/* Kernel API */
+void kernel_enable_single_step(struct pt_regs *regs)
+{
+	WARN_ON(!irqs_disabled());
+	set_regs_spsr_ss(regs);
+	mdscr_write(mdscr_read() | DBG_MDSCR_SS);
+	enable_debug_monitors(DBG_ACTIVE_EL1);
+}
+
+void kernel_disable_single_step(void)
+{
+	WARN_ON(!irqs_disabled());
+	mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
+	disable_debug_monitors(DBG_ACTIVE_EL1);
+}
+
+int kernel_active_single_step(void)
+{
+	WARN_ON(!irqs_disabled());
+	return mdscr_read() & DBG_MDSCR_SS;
+}
+
+/* ptrace API */
+void user_enable_single_step(struct task_struct *task)
+{
+	set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
+	set_regs_spsr_ss(task_pt_regs(task));
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
+}
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
new file mode 100644
index 000000000..8ce9b0577
--- /dev/null
+++ b/arch/arm64/kernel/efi-entry.S
@@ -0,0 +1,124 @@
+/*
+ * EFI entry point.
+ *
+ * Copyright (C) 2013, 2014 Red Hat, Inc.
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include <asm/assembler.h>
+
+#define EFI_LOAD_ERROR 0x8000000000000001
+
+	__INIT
+
+	/*
+	 * We arrive here from the EFI boot manager with:
+	 *
+	 *    * CPU in little-endian mode
+	 *    * MMU on with identity-mapped RAM
+	 *    * Icache and Dcache on
+	 *
+	 * We will most likely be running from some place other than where
+	 * we want to be. The kernel image wants to be placed at TEXT_OFFSET
+	 * from start of RAM.
+	 */
+ENTRY(efi_stub_entry)
+	/*
+	 * Create a stack frame to save FP/LR with extra space
+	 * for image_addr variable passed to efi_entry().
+	 */
+	stp	x29, x30, [sp, #-32]!
+
+	/*
+	 * Call efi_entry to do the real work.
+	 * x0 and x1 are already set up by firmware. Current runtime
+	 * address of image is calculated and passed via *image_addr.
+	 *
+	 * unsigned long efi_entry(void *handle,
+	 *                         efi_system_table_t *sys_table,
+	 *                         unsigned long *image_addr) ;
+	 */
+	adrp	x8, _text
+	add	x8, x8, #:lo12:_text
+	add	x2, sp, 16
+	str	x8, [x2]
+	bl	efi_entry
+	cmn	x0, #1
+	b.eq	efi_load_fail
+
+	/*
+	 * efi_entry() will have copied the kernel image if necessary and we
+	 * return here with device tree address in x0 and the kernel entry
+	 * point stored at *image_addr. Save those values in registers which
+	 * are callee preserved.
+	 */
+	mov	x20, x0		// DTB address
+	ldr	x0, [sp, #16]	// relocated _text address
+	ldr	x21, =stext_offset
+	add	x21, x0, x21
+
+	/*
+	 * Calculate size of the kernel Image (same for original and copy).
+	 */
+	adrp	x1, _text
+	add	x1, x1, #:lo12:_text
+	adrp	x2, _edata
+	add	x2, x2, #:lo12:_edata
+	sub	x1, x2, x1
+
+	/*
+	 * Flush the copied Image to the PoC, and ensure it is not shadowed by
+	 * stale icache entries from before relocation.
+	 */
+	bl	__flush_dcache_area
+	ic	ialluis
+
+	/*
+	 * Ensure that the rest of this function (in the original Image) is
+	 * visible when the caches are disabled. The I-cache can't have stale
+	 * entries for the VA range of the current image, so no maintenance is
+	 * necessary.
+	 */
+	adr	x0, efi_stub_entry
+	adr	x1, efi_stub_entry_end
+	sub	x1, x1, x0
+	bl	__flush_dcache_area
+
+	/* Turn off Dcache and MMU */
+	mrs	x0, CurrentEL
+	cmp	x0, #CurrentEL_EL2
+	b.ne	1f
+	mrs	x0, sctlr_el2
+	bic	x0, x0, #1 << 0	// clear SCTLR.M
+	bic	x0, x0, #1 << 2	// clear SCTLR.C
+	msr	sctlr_el2, x0
+	isb
+	b	2f
+1:
+	mrs	x0, sctlr_el1
+	bic	x0, x0, #1 << 0	// clear SCTLR.M
+	bic	x0, x0, #1 << 2	// clear SCTLR.C
+	msr	sctlr_el1, x0
+	isb
+2:
+	/* Jump to kernel entry point */
+	mov	x0, x20
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x21
+
+efi_load_fail:
+	mov	x0, #EFI_LOAD_ERROR
+	ldp	x29, x30, [sp], #32
+	ret
+
+efi_stub_entry_end:
+ENDPROC(efi_stub_entry)
diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
new file mode 100644
index 000000000..f5374065a
--- /dev/null
+++ b/arch/arm64/kernel/efi-stub.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2013, 2014 Linaro Ltd;  <roy.franz@linaro.org>
+ *
+ * This file implements the EFI boot stub for the arm64 kernel.
+ * Adapted from ARM version by Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <asm/sections.h>
+
+efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table,
+					unsigned long *image_addr,
+					unsigned long *image_size,
+					unsigned long *reserve_addr,
+					unsigned long *reserve_size,
+					unsigned long dram_base,
+					efi_loaded_image_t *image)
+{
+	efi_status_t status;
+	unsigned long kernel_size, kernel_memsize = 0;
+
+	/* Relocate the image, if required. */
+	kernel_size = _edata - _text;
+	if (*image_addr != (dram_base + TEXT_OFFSET)) {
+		kernel_memsize = kernel_size + (_end - _edata);
+		status = efi_low_alloc(sys_table, kernel_memsize + TEXT_OFFSET,
+				       SZ_2M, reserve_addr);
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table, "Failed to relocate kernel\n");
+			return status;
+		}
+		memcpy((void *)*reserve_addr + TEXT_OFFSET, (void *)*image_addr,
+		       kernel_size);
+		*image_addr = *reserve_addr + TEXT_OFFSET;
+		*reserve_size = kernel_memsize + TEXT_OFFSET;
+	}
+
+
+	return EFI_SUCCESS;
+}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
new file mode 100644
index 000000000..ab21e0d58
--- /dev/null
+++ b/arch/arm64/kernel/efi.c
@@ -0,0 +1,369 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 2.4
+ *
+ * Copyright (C) 2013, 2014 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/dmi.h>
+#include <linux/efi.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/mm_types.h>
+#include <linux/bootmem.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/preempt.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/cacheflush.h>
+#include <asm/efi.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+
+struct efi_memory_map memmap;
+
+static u64 efi_system_table;
+
+static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss;
+
+static struct mm_struct efi_mm = {
+	.mm_rb			= RB_ROOT,
+	.pgd			= efi_pgd,
+	.mm_users		= ATOMIC_INIT(2),
+	.mm_count		= ATOMIC_INIT(1),
+	.mmap_sem		= __RWSEM_INITIALIZER(efi_mm.mmap_sem),
+	.page_table_lock	= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
+	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
+	INIT_MM_CONTEXT(efi_mm)
+};
+
+static int uefi_debug __initdata;
+static int __init uefi_debug_setup(char *str)
+{
+	uefi_debug = 1;
+
+	return 0;
+}
+early_param("uefi_debug", uefi_debug_setup);
+
+static int __init is_normal_ram(efi_memory_desc_t *md)
+{
+	if (md->attribute & EFI_MEMORY_WB)
+		return 1;
+	return 0;
+}
+
+/*
+ * Translate a EFI virtual address into a physical address: this is necessary,
+ * as some data members of the EFI system table are virtually remapped after
+ * SetVirtualAddressMap() has been called.
+ */
+static phys_addr_t efi_to_phys(unsigned long addr)
+{
+	efi_memory_desc_t *md;
+
+	for_each_efi_memory_desc(&memmap, md) {
+		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+			continue;
+		if (md->virt_addr == 0)
+			/* no virtual mapping has been installed by the stub */
+			break;
+		if (md->virt_addr <= addr &&
+		    (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT))
+			return md->phys_addr + addr - md->virt_addr;
+	}
+	return addr;
+}
+
+static int __init uefi_init(void)
+{
+	efi_char16_t *c16;
+	void *config_tables;
+	u64 table_size;
+	char vendor[100] = "unknown";
+	int i, retval;
+
+	efi.systab = early_memremap(efi_system_table,
+				    sizeof(efi_system_table_t));
+	if (efi.systab == NULL) {
+		pr_warn("Unable to map EFI system table.\n");
+		return -ENOMEM;
+	}
+
+	set_bit(EFI_BOOT, &efi.flags);
+	set_bit(EFI_64BIT, &efi.flags);
+
+	/*
+	 * Verify the EFI Table
+	 */
+	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
+		pr_err("System table signature incorrect\n");
+		retval = -EINVAL;
+		goto out;
+	}
+	if ((efi.systab->hdr.revision >> 16) < 2)
+		pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n",
+			efi.systab->hdr.revision >> 16,
+			efi.systab->hdr.revision & 0xffff);
+
+	/* Show what we know for posterity */
+	c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor),
+			     sizeof(vendor));
+	if (c16) {
+		for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
+			vendor[i] = c16[i];
+		vendor[i] = '\0';
+		early_memunmap(c16, sizeof(vendor));
+	}
+
+	pr_info("EFI v%u.%.02u by %s\n",
+		efi.systab->hdr.revision >> 16,
+		efi.systab->hdr.revision & 0xffff, vendor);
+
+	table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
+	config_tables = early_memremap(efi_to_phys(efi.systab->tables),
+				       table_size);
+
+	retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
+					 sizeof(efi_config_table_64_t), NULL);
+
+	early_memunmap(config_tables, table_size);
+out:
+	early_memunmap(efi.systab,  sizeof(efi_system_table_t));
+	return retval;
+}
+
+/*
+ * Return true for RAM regions we want to permanently reserve.
+ */
+static __init int is_reserve_region(efi_memory_desc_t *md)
+{
+	switch (md->type) {
+	case EFI_LOADER_CODE:
+	case EFI_LOADER_DATA:
+	case EFI_BOOT_SERVICES_CODE:
+	case EFI_BOOT_SERVICES_DATA:
+	case EFI_CONVENTIONAL_MEMORY:
+		return 0;
+	default:
+		break;
+	}
+	return is_normal_ram(md);
+}
+
+static __init void reserve_regions(void)
+{
+	efi_memory_desc_t *md;
+	u64 paddr, npages, size;
+
+	if (uefi_debug)
+		pr_info("Processing EFI memory map:\n");
+
+	for_each_efi_memory_desc(&memmap, md) {
+		paddr = md->phys_addr;
+		npages = md->num_pages;
+
+		if (uefi_debug) {
+			char buf[64];
+
+			pr_info("  0x%012llx-0x%012llx %s",
+				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
+				efi_md_typeattr_format(buf, sizeof(buf), md));
+		}
+
+		memrange_efi_to_native(&paddr, &npages);
+		size = npages << PAGE_SHIFT;
+
+		if (is_normal_ram(md))
+			early_init_dt_add_memory_arch(paddr, size);
+
+		if (is_reserve_region(md)) {
+			memblock_reserve(paddr, size);
+			if (uefi_debug)
+				pr_cont("*");
+		}
+
+		if (uefi_debug)
+			pr_cont("\n");
+	}
+
+	set_bit(EFI_MEMMAP, &efi.flags);
+}
+
+void __init efi_init(void)
+{
+	struct efi_fdt_params params;
+
+	/* Grab UEFI information placed in FDT by stub */
+	if (!efi_get_fdt_params(&params, uefi_debug))
+		return;
+
+	efi_system_table = params.system_table;
+
+	memblock_reserve(params.mmap & PAGE_MASK,
+			 PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
+	memmap.phys_map = (void *)params.mmap;
+	memmap.map = early_memremap(params.mmap, params.mmap_size);
+	memmap.map_end = memmap.map + params.mmap_size;
+	memmap.desc_size = params.desc_size;
+	memmap.desc_version = params.desc_ver;
+
+	if (uefi_init() < 0)
+		return;
+
+	reserve_regions();
+	early_memunmap(memmap.map, params.mmap_size);
+}
+
+static bool __init efi_virtmap_init(void)
+{
+	efi_memory_desc_t *md;
+
+	for_each_efi_memory_desc(&memmap, md) {
+		u64 paddr, npages, size;
+		pgprot_t prot;
+
+		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+			continue;
+		if (md->virt_addr == 0)
+			return false;
+
+		paddr = md->phys_addr;
+		npages = md->num_pages;
+		memrange_efi_to_native(&paddr, &npages);
+		size = npages << PAGE_SHIFT;
+
+		pr_info("  EFI remap 0x%016llx => %p\n",
+			md->phys_addr, (void *)md->virt_addr);
+
+		/*
+		 * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+		 * executable, everything else can be mapped with the XN bits
+		 * set.
+		 */
+		if (!is_normal_ram(md))
+			prot = __pgprot(PROT_DEVICE_nGnRE);
+		else if (md->type == EFI_RUNTIME_SERVICES_CODE)
+			prot = PAGE_KERNEL_EXEC;
+		else
+			prot = PAGE_KERNEL;
+
+		create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot);
+	}
+	return true;
+}
+
+/*
+ * Enable the UEFI Runtime Services if all prerequisites are in place, i.e.,
+ * non-early mapping of the UEFI system table and virtual mappings for all
+ * EFI_MEMORY_RUNTIME regions.
+ */
+static int __init arm64_enable_runtime_services(void)
+{
+	u64 mapsize;
+
+	if (!efi_enabled(EFI_BOOT)) {
+		pr_info("EFI services will not be available.\n");
+		return -1;
+	}
+
+	if (efi_runtime_disabled()) {
+		pr_info("EFI runtime services will be disabled.\n");
+		return -1;
+	}
+
+	pr_info("Remapping and enabling EFI services.\n");
+
+	mapsize = memmap.map_end - memmap.map;
+	memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map,
+						   mapsize);
+	if (!memmap.map) {
+		pr_err("Failed to remap EFI memory map\n");
+		return -1;
+	}
+	memmap.map_end = memmap.map + mapsize;
+	efi.memmap = &memmap;
+
+	efi.systab = (__force void *)ioremap_cache(efi_system_table,
+						   sizeof(efi_system_table_t));
+	if (!efi.systab) {
+		pr_err("Failed to remap EFI System Table\n");
+		return -1;
+	}
+	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
+	if (!efi_virtmap_init()) {
+		pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
+		return -1;
+	}
+
+	/* Set up runtime services function pointers */
+	efi_native_runtime_setup();
+	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
+	efi.runtime_version = efi.systab->hdr.revision;
+
+	return 0;
+}
+early_initcall(arm64_enable_runtime_services);
+
+static int __init arm64_dmi_init(void)
+{
+	/*
+	 * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to
+	 * be called early because dmi_id_init(), which is an arch_initcall
+	 * itself, depends on dmi_scan_machine() having been called already.
+	 */
+	dmi_scan_machine();
+	if (dmi_available)
+		dmi_set_dump_stack_arch_desc();
+	return 0;
+}
+core_initcall(arm64_dmi_init);
+
+static void efi_set_pgd(struct mm_struct *mm)
+{
+	if (mm == &init_mm)
+		cpu_set_reserved_ttbr0();
+	else
+		cpu_switch_mm(mm->pgd, mm);
+
+	flush_tlb_all();
+	if (icache_is_aivivt())
+		__flush_icache_all();
+}
+
+void efi_virtmap_load(void)
+{
+	preempt_disable();
+	efi_set_pgd(&efi_mm);
+}
+
+void efi_virtmap_unload(void)
+{
+	efi_set_pgd(current->active_mm);
+	preempt_enable();
+}
+
+/*
+ * UpdateCapsule() depends on the system being shutdown via
+ * ResetSystem().
+ */
+bool efi_poweroff_required(void)
+{
+	return efi_enabled(EFI_RUNTIME_SERVICES);
+}
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
new file mode 100644
index 000000000..c44a82f14
--- /dev/null
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -0,0 +1,67 @@
+/*
+ * FP/SIMD state saving and restoring
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/fpsimdmacros.h>
+
+/*
+ * Save the FP registers.
+ *
+ * x0 - pointer to struct fpsimd_state
+ */
+ENTRY(fpsimd_save_state)
+	fpsimd_save x0, 8
+	ret
+ENDPROC(fpsimd_save_state)
+
+/*
+ * Load the FP registers.
+ *
+ * x0 - pointer to struct fpsimd_state
+ */
+ENTRY(fpsimd_load_state)
+	fpsimd_restore x0, 8
+	ret
+ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+	fpsimd_save_partial x0, 1, 8, 9
+	ret
+ENDPROC(fpsimd_save_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+	fpsimd_restore_partial x0, 8, 9
+	ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
new file mode 100644
index 000000000..08cafc518
--- /dev/null
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -0,0 +1,214 @@
+/*
+ * arch/arm64/kernel/entry-ftrace.S
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+/*
+ * Gcc with -pg will put the following code in the beginning of each function:
+ *      mov x0, x30
+ *      bl _mcount
+ *	[function's body ...]
+ * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
+ * ftrace is enabled.
+ *
+ * Please note that x0 as an argument will not be used here because we can
+ * get lr(x30) of instrumented function at any time by winding up call stack
+ * as long as the kernel is compiled without -fomit-frame-pointer.
+ * (or CONFIG_FRAME_POINTER, this is forced on arm64)
+ *
+ * stack layout after mcount_enter in _mcount():
+ *
+ * current sp/fp =>  0:+-----+
+ * in _mcount()        | x29 | -> instrumented function's fp
+ *                     +-----+
+ *                     | x30 | -> _mcount()'s lr (= instrumented function's pc)
+ * old sp       => +16:+-----+
+ * when instrumented   |     |
+ * function calls      | ... |
+ * _mcount()           |     |
+ *                     |     |
+ * instrumented => +xx:+-----+
+ * function's fp       | x29 | -> parent's fp
+ *                     +-----+
+ *                     | x30 | -> instrumented function's lr (= parent's pc)
+ *                     +-----+
+ *                     | ... |
+ */
+
+	.macro mcount_enter
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	.endm
+
+	.macro mcount_exit
+	ldp	x29, x30, [sp], #16
+	ret
+	.endm
+
+	.macro mcount_adjust_addr rd, rn
+	sub	\rd, \rn, #AARCH64_INSN_SIZE
+	.endm
+
+	/* for instrumented function's parent */
+	.macro mcount_get_parent_fp reg
+	ldr	\reg, [x29]
+	ldr	\reg, [\reg]
+	.endm
+
+	/* for instrumented function */
+	.macro mcount_get_pc0 reg
+	mcount_adjust_addr	\reg, x30
+	.endm
+
+	.macro mcount_get_pc reg
+	ldr	\reg, [x29, #8]
+	mcount_adjust_addr	\reg, \reg
+	.endm
+
+	.macro mcount_get_lr reg
+	ldr	\reg, [x29]
+	ldr	\reg, [\reg, #8]
+	mcount_adjust_addr	\reg, \reg
+	.endm
+
+	.macro mcount_get_lr_addr reg
+	ldr	\reg, [x29]
+	add	\reg, \reg, #8
+	.endm
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+/*
+ * void _mcount(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function makes calls, if enabled, to:
+ *     - tracer function to probe instrumented function's entry,
+ *     - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(_mcount)
+	mcount_enter
+
+	adrp	x0, ftrace_trace_function
+	ldr	x2, [x0, #:lo12:ftrace_trace_function]
+	adr	x0, ftrace_stub
+	cmp	x0, x2			// if (ftrace_trace_function
+	b.eq	skip_ftrace_call	//     != ftrace_stub) {
+
+	mcount_get_pc	x0		//       function's pc
+	mcount_get_lr	x1		//       function's lr (= parent's pc)
+	blr	x2			//   (*ftrace_trace_function)(pc, lr);
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
+skip_ftrace_call:			//   return;
+	mcount_exit			// }
+#else
+	mcount_exit			//   return;
+					// }
+skip_ftrace_call:
+	adrp	x1, ftrace_graph_return
+	ldr	x2, [x1, #:lo12:ftrace_graph_return]
+	cmp	x0, x2			//   if ((ftrace_graph_return
+	b.ne	ftrace_graph_caller	//        != ftrace_stub)
+
+	adrp	x1, ftrace_graph_entry	//     || (ftrace_graph_entry
+	adrp	x0, ftrace_graph_entry_stub //     != ftrace_graph_entry_stub))
+	ldr	x2, [x1, #:lo12:ftrace_graph_entry]
+	add	x0, x0, #:lo12:ftrace_graph_entry_stub
+	cmp	x0, x2
+	b.ne	ftrace_graph_caller	//     ftrace_graph_caller();
+
+	mcount_exit
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ENDPROC(_mcount)
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+/*
+ * _mcount() is used to build the kernel with -pg option, but all the branch
+ * instructions to _mcount() are replaced to NOP initially at kernel start up,
+ * and later on, NOP to branch to ftrace_caller() when enabled or branch to
+ * NOP when disabled per-function base.
+ */
+ENTRY(_mcount)
+	ret
+ENDPROC(_mcount)
+
+/*
+ * void ftrace_caller(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function is a counterpart of _mcount() in 'static' ftrace, and
+ * makes calls to:
+ *     - tracer function to probe instrumented function's entry,
+ *     - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(ftrace_caller)
+	mcount_enter
+
+	mcount_get_pc0	x0		//     function's pc
+	mcount_get_lr	x1		//     function's lr
+
+	.global ftrace_call
+ftrace_call:				// tracer(pc, lr);
+	nop				// This will be replaced with "bl xxx"
+					// where xxx can be any kind of tracer.
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.global ftrace_graph_call
+ftrace_graph_call:			// ftrace_graph_caller();
+	nop				// If enabled, this will be replaced
+					// "b ftrace_graph_caller"
+#endif
+
+	mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(ftrace_stub)
+	ret
+ENDPROC(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from _mcount() or ftrace_caller() when function_graph tracer is
+ * selected.
+ * This function w/ prepare_ftrace_return() fakes link register's value on
+ * the call stack in order to intercept instrumented function's return path
+ * and run return_to_handler() later on its exit.
+ */
+ENTRY(ftrace_graph_caller)
+	mcount_get_lr_addr	  x0	//     pointer to function's saved lr
+	mcount_get_pc		  x1	//     function's pc
+	mcount_get_parent_fp	  x2	//     parent's fp
+	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp)
+
+	mcount_exit
+ENDPROC(ftrace_graph_caller)
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ */
+ENTRY(return_to_handler)
+	str	x0, [sp, #-16]!
+	mov	x0, x29			//     parent's fp
+	bl	ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
+	mov	x30, x0			// restore the original return address
+	ldr	x0, [sp], #16
+	ret
+END(return_to_handler)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
new file mode 100644
index 000000000..bddd04d03
--- /dev/null
+++ b/arch/arm64/kernel/entry.S
@@ -0,0 +1,731 @@
+/*
+ * Low-level exception handling code
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Authors:	Catalin Marinas <catalin.marinas@arm.com>
+ *		Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+#include <asm/alternative-asm.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
+#include <asm/errno.h>
+#include <asm/esr.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+/*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+	.macro ct_user_exit, syscall = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+	bl	context_tracking_user_exit
+	.if \syscall == 1
+	/*
+	 * Save/restore needed during syscalls.  Restore syscall arguments from
+	 * the values already saved on stack during kernel_entry.
+	 */
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, #S_X2]
+	ldp	x4, x5, [sp, #S_X4]
+	ldp	x6, x7, [sp, #S_X6]
+	.endif
+#endif
+	.endm
+
+	.macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+	bl	context_tracking_user_enter
+#endif
+	.endm
+
+/*
+ * Bad Abort numbers
+ *-----------------
+ */
+#define BAD_SYNC	0
+#define BAD_IRQ		1
+#define BAD_FIQ		2
+#define BAD_ERROR	3
+
+	.macro	kernel_entry, el, regsize = 64
+	sub	sp, sp, #S_FRAME_SIZE
+	.if	\regsize == 32
+	mov	w0, w0				// zero upper 32 bits of x0
+	.endif
+	stp	x0, x1, [sp, #16 * 0]
+	stp	x2, x3, [sp, #16 * 1]
+	stp	x4, x5, [sp, #16 * 2]
+	stp	x6, x7, [sp, #16 * 3]
+	stp	x8, x9, [sp, #16 * 4]
+	stp	x10, x11, [sp, #16 * 5]
+	stp	x12, x13, [sp, #16 * 6]
+	stp	x14, x15, [sp, #16 * 7]
+	stp	x16, x17, [sp, #16 * 8]
+	stp	x18, x19, [sp, #16 * 9]
+	stp	x20, x21, [sp, #16 * 10]
+	stp	x22, x23, [sp, #16 * 11]
+	stp	x24, x25, [sp, #16 * 12]
+	stp	x26, x27, [sp, #16 * 13]
+	stp	x28, x29, [sp, #16 * 14]
+
+	.if	\el == 0
+	mrs	x21, sp_el0
+	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
+	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
+	disable_step_tsk x19, x20		// exceptions when scheduling.
+	.else
+	add	x21, sp, #S_FRAME_SIZE
+	.endif
+	mrs	x22, elr_el1
+	mrs	x23, spsr_el1
+	stp	lr, x21, [sp, #S_LR]
+	stp	x22, x23, [sp, #S_PC]
+
+	/*
+	 * Set syscallno to -1 by default (overridden later if real syscall).
+	 */
+	.if	\el == 0
+	mvn	x21, xzr
+	str	x21, [sp, #S_SYSCALLNO]
+	.endif
+
+	/*
+	 * Registers that may be useful after this macro is invoked:
+	 *
+	 * x21 - aborted SP
+	 * x22 - aborted PC
+	 * x23 - aborted PSTATE
+	*/
+	.endm
+
+	.macro	kernel_exit, el, ret = 0
+	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
+	.if	\el == 0
+	ct_user_enter
+	ldr	x23, [sp, #S_SP]		// load return stack pointer
+	msr	sp_el0, x23
+
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	alternative_insn						\
+	"nop",								\
+	"tbz x22, #4, 1f",						\
+	ARM64_WORKAROUND_845719
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	alternative_insn						\
+	"nop; nop",							\
+	"mrs x29, contextidr_el1; msr contextidr_el1, x29; 1:",		\
+	ARM64_WORKAROUND_845719
+#else
+	alternative_insn						\
+	"nop",								\
+	"msr contextidr_el1, xzr; 1:",					\
+	ARM64_WORKAROUND_845719
+#endif
+#endif
+	.endif
+	msr	elr_el1, x21			// set up the return data
+	msr	spsr_el1, x22
+	.if	\ret
+	ldr	x1, [sp, #S_X1]			// preserve x0 (syscall return)
+	.else
+	ldp	x0, x1, [sp, #16 * 0]
+	.endif
+	ldp	x2, x3, [sp, #16 * 1]
+	ldp	x4, x5, [sp, #16 * 2]
+	ldp	x6, x7, [sp, #16 * 3]
+	ldp	x8, x9, [sp, #16 * 4]
+	ldp	x10, x11, [sp, #16 * 5]
+	ldp	x12, x13, [sp, #16 * 6]
+	ldp	x14, x15, [sp, #16 * 7]
+	ldp	x16, x17, [sp, #16 * 8]
+	ldp	x18, x19, [sp, #16 * 9]
+	ldp	x20, x21, [sp, #16 * 10]
+	ldp	x22, x23, [sp, #16 * 11]
+	ldp	x24, x25, [sp, #16 * 12]
+	ldp	x26, x27, [sp, #16 * 13]
+	ldp	x28, x29, [sp, #16 * 14]
+	ldr	lr, [sp, #S_LR]
+	add	sp, sp, #S_FRAME_SIZE		// restore sp
+	eret					// return to kernel
+	.endm
+
+	.macro	get_thread_info, rd
+	mov	\rd, sp
+	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
+	.endm
+
+/*
+ * These are the registers used in the syscall handler, and allow us to
+ * have in theory up to 7 arguments to a function - x0 to x6.
+ *
+ * x7 is reserved for the system call number in 32-bit mode.
+ */
+sc_nr	.req	x25		// number of system calls
+scno	.req	x26		// syscall number
+stbl	.req	x27		// syscall table pointer
+tsk	.req	x28		// current thread_info
+
+/*
+ * Interrupt handling.
+ */
+	.macro	irq_handler
+	adrp	x1, handle_arch_irq
+	ldr	x1, [x1, #:lo12:handle_arch_irq]
+	mov	x0, sp
+	blr	x1
+	.endm
+
+	.text
+
+/*
+ * Exception vectors.
+ */
+
+	.align	11
+ENTRY(vectors)
+	ventry	el1_sync_invalid		// Synchronous EL1t
+	ventry	el1_irq_invalid			// IRQ EL1t
+	ventry	el1_fiq_invalid			// FIQ EL1t
+	ventry	el1_error_invalid		// Error EL1t
+
+	ventry	el1_sync			// Synchronous EL1h
+	ventry	el1_irq				// IRQ EL1h
+	ventry	el1_fiq_invalid			// FIQ EL1h
+	ventry	el1_error_invalid		// Error EL1h
+
+	ventry	el0_sync			// Synchronous 64-bit EL0
+	ventry	el0_irq				// IRQ 64-bit EL0
+	ventry	el0_fiq_invalid			// FIQ 64-bit EL0
+	ventry	el0_error_invalid		// Error 64-bit EL0
+
+#ifdef CONFIG_COMPAT
+	ventry	el0_sync_compat			// Synchronous 32-bit EL0
+	ventry	el0_irq_compat			// IRQ 32-bit EL0
+	ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0
+	ventry	el0_error_invalid_compat	// Error 32-bit EL0
+#else
+	ventry	el0_sync_invalid		// Synchronous 32-bit EL0
+	ventry	el0_irq_invalid			// IRQ 32-bit EL0
+	ventry	el0_fiq_invalid			// FIQ 32-bit EL0
+	ventry	el0_error_invalid		// Error 32-bit EL0
+#endif
+END(vectors)
+
+/*
+ * Invalid mode handlers
+ */
+	.macro	inv_entry, el, reason, regsize = 64
+	kernel_entry el, \regsize
+	mov	x0, sp
+	mov	x1, #\reason
+	mrs	x2, esr_el1
+	b	bad_mode
+	.endm
+
+el0_sync_invalid:
+	inv_entry 0, BAD_SYNC
+ENDPROC(el0_sync_invalid)
+
+el0_irq_invalid:
+	inv_entry 0, BAD_IRQ
+ENDPROC(el0_irq_invalid)
+
+el0_fiq_invalid:
+	inv_entry 0, BAD_FIQ
+ENDPROC(el0_fiq_invalid)
+
+el0_error_invalid:
+	inv_entry 0, BAD_ERROR
+ENDPROC(el0_error_invalid)
+
+#ifdef CONFIG_COMPAT
+el0_fiq_invalid_compat:
+	inv_entry 0, BAD_FIQ, 32
+ENDPROC(el0_fiq_invalid_compat)
+
+el0_error_invalid_compat:
+	inv_entry 0, BAD_ERROR, 32
+ENDPROC(el0_error_invalid_compat)
+#endif
+
+el1_sync_invalid:
+	inv_entry 1, BAD_SYNC
+ENDPROC(el1_sync_invalid)
+
+el1_irq_invalid:
+	inv_entry 1, BAD_IRQ
+ENDPROC(el1_irq_invalid)
+
+el1_fiq_invalid:
+	inv_entry 1, BAD_FIQ
+ENDPROC(el1_fiq_invalid)
+
+el1_error_invalid:
+	inv_entry 1, BAD_ERROR
+ENDPROC(el1_error_invalid)
+
+/*
+ * EL1 mode handlers.
+ */
+	.align	6
+el1_sync:
+	kernel_entry 1
+	mrs	x1, esr_el1			// read the syndrome register
+	lsr	x24, x1, #ESR_ELx_EC_SHIFT	// exception class
+	cmp	x24, #ESR_ELx_EC_DABT_CUR	// data abort in EL1
+	b.eq	el1_da
+	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
+	b.eq	el1_undef
+	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
+	b.eq	el1_sp_pc
+	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
+	b.eq	el1_sp_pc
+	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL1
+	b.eq	el1_undef
+	cmp	x24, #ESR_ELx_EC_BREAKPT_CUR	// debug exception in EL1
+	b.ge	el1_dbg
+	b	el1_inv
+el1_da:
+	/*
+	 * Data abort handling
+	 */
+	mrs	x0, far_el1
+	enable_dbg
+	// re-enable interrupts if they were enabled in the aborted context
+	tbnz	x23, #7, 1f			// PSR_I_BIT
+	enable_irq
+1:
+	mov	x2, sp				// struct pt_regs
+	bl	do_mem_abort
+
+	// disable interrupts before pulling preserved data off the stack
+	disable_irq
+	kernel_exit 1
+el1_sp_pc:
+	/*
+	 * Stack or PC alignment exception handling
+	 */
+	mrs	x0, far_el1
+	enable_dbg
+	mov	x2, sp
+	b	do_sp_pc_abort
+el1_undef:
+	/*
+	 * Undefined instruction
+	 */
+	enable_dbg
+	mov	x0, sp
+	b	do_undefinstr
+el1_dbg:
+	/*
+	 * Debug exception handling
+	 */
+	cmp	x24, #ESR_ELx_EC_BRK64		// if BRK64
+	cinc	x24, x24, eq			// set bit '0'
+	tbz	x24, #0, el1_inv		// EL1 only
+	mrs	x0, far_el1
+	mov	x2, sp				// struct pt_regs
+	bl	do_debug_exception
+	kernel_exit 1
+el1_inv:
+	// TODO: add support for undefined instructions in kernel mode
+	enable_dbg
+	mov	x0, sp
+	mov	x1, #BAD_SYNC
+	mrs	x2, esr_el1
+	b	bad_mode
+ENDPROC(el1_sync)
+
+	.align	6
+el1_irq:
+	kernel_entry 1
+	enable_dbg
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off
+#endif
+
+	irq_handler
+
+#ifdef CONFIG_PREEMPT
+	get_thread_info tsk
+	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
+	cbnz	w24, 1f				// preempt count != 0
+	ldr	x0, [tsk, #TI_FLAGS]		// get flags
+	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling?
+	bl	el1_preempt
+1:
+#endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_on
+#endif
+	kernel_exit 1
+ENDPROC(el1_irq)
+
+#ifdef CONFIG_PREEMPT
+el1_preempt:
+	mov	x24, lr
+1:	bl	preempt_schedule_irq		// irq en/disable is done inside
+	ldr	x0, [tsk, #TI_FLAGS]		// get new tasks TI_FLAGS
+	tbnz	x0, #TIF_NEED_RESCHED, 1b	// needs rescheduling?
+	ret	x24
+#endif
+
+/*
+ * EL0 mode handlers.
+ */
+	.align	6
+el0_sync:
+	kernel_entry 0
+	mrs	x25, esr_el1			// read the syndrome register
+	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
+	cmp	x24, #ESR_ELx_EC_SVC64		// SVC in 64-bit state
+	b.eq	el0_svc
+	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
+	b.eq	el0_da
+	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
+	b.eq	el0_ia
+	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
+	b.eq	el0_fpsimd_acc
+	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
+	b.eq	el0_fpsimd_exc
+	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
+	b.eq	el0_sp_pc
+	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
+	b.eq	el0_sp_pc
+	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
+	b.ge	el0_dbg
+	b	el0_inv
+
+#ifdef CONFIG_COMPAT
+	.align	6
+el0_sync_compat:
+	kernel_entry 0, 32
+	mrs	x25, esr_el1			// read the syndrome register
+	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
+	cmp	x24, #ESR_ELx_EC_SVC32		// SVC in 32-bit state
+	b.eq	el0_svc_compat
+	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
+	b.eq	el0_da
+	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
+	b.eq	el0_ia
+	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
+	b.eq	el0_fpsimd_acc
+	cmp	x24, #ESR_ELx_EC_FP_EXC32	// FP/ASIMD exception
+	b.eq	el0_fpsimd_exc
+	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_CP15_32	// CP15 MRC/MCR trap
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_CP15_64	// CP15 MRRC/MCRR trap
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_CP14_MR	// CP14 MRC/MCR trap
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_CP14_LS	// CP14 LDC/STC trap
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_CP14_64	// CP14 MRRC/MCRR trap
+	b.eq	el0_undef
+	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
+	b.ge	el0_dbg
+	b	el0_inv
+el0_svc_compat:
+	/*
+	 * AArch32 syscall handling
+	 */
+	adrp	stbl, compat_sys_call_table	// load compat syscall table pointer
+	uxtw	scno, w7			// syscall number in w7 (r7)
+	mov     sc_nr, #__NR_compat_syscalls
+	b	el0_svc_naked
+
+	.align	6
+el0_irq_compat:
+	kernel_entry 0, 32
+	b	el0_irq_naked
+#endif
+
+el0_da:
+	/*
+	 * Data abort handling
+	 */
+	mrs	x26, far_el1
+	// enable interrupts before calling the main handler
+	enable_dbg_and_irq
+	ct_user_exit
+	bic	x0, x26, #(0xff << 56)
+	mov	x1, x25
+	mov	x2, sp
+	bl	do_mem_abort
+	b	ret_to_user
+el0_ia:
+	/*
+	 * Instruction abort handling
+	 */
+	mrs	x26, far_el1
+	// enable interrupts before calling the main handler
+	enable_dbg_and_irq
+	ct_user_exit
+	mov	x0, x26
+	orr	x1, x25, #1 << 24		// use reserved ISS bit for instruction aborts
+	mov	x2, sp
+	bl	do_mem_abort
+	b	ret_to_user
+el0_fpsimd_acc:
+	/*
+	 * Floating Point or Advanced SIMD access
+	 */
+	enable_dbg
+	ct_user_exit
+	mov	x0, x25
+	mov	x1, sp
+	bl	do_fpsimd_acc
+	b	ret_to_user
+el0_fpsimd_exc:
+	/*
+	 * Floating Point or Advanced SIMD exception
+	 */
+	enable_dbg
+	ct_user_exit
+	mov	x0, x25
+	mov	x1, sp
+	bl	do_fpsimd_exc
+	b	ret_to_user
+el0_sp_pc:
+	/*
+	 * Stack or PC alignment exception handling
+	 */
+	mrs	x26, far_el1
+	// enable interrupts before calling the main handler
+	enable_dbg_and_irq
+	ct_user_exit
+	mov	x0, x26
+	mov	x1, x25
+	mov	x2, sp
+	bl	do_sp_pc_abort
+	b	ret_to_user
+el0_undef:
+	/*
+	 * Undefined instruction
+	 */
+	// enable interrupts before calling the main handler
+	enable_dbg_and_irq
+	ct_user_exit
+	mov	x0, sp
+	bl	do_undefinstr
+	b	ret_to_user
+el0_dbg:
+	/*
+	 * Debug exception handling
+	 */
+	tbnz	x24, #0, el0_inv		// EL0 only
+	mrs	x0, far_el1
+	mov	x1, x25
+	mov	x2, sp
+	bl	do_debug_exception
+	enable_dbg
+	ct_user_exit
+	b	ret_to_user
+el0_inv:
+	enable_dbg
+	ct_user_exit
+	mov	x0, sp
+	mov	x1, #BAD_SYNC
+	mrs	x2, esr_el1
+	bl	bad_mode
+	b	ret_to_user
+ENDPROC(el0_sync)
+
+	.align	6
+el0_irq:
+	kernel_entry 0
+el0_irq_naked:
+	enable_dbg
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off
+#endif
+
+	ct_user_exit
+	irq_handler
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_on
+#endif
+	b	ret_to_user
+ENDPROC(el0_irq)
+
+/*
+ * Register switch for AArch64. The callee-saved registers need to be saved
+ * and restored. On entry:
+ *   x0 = previous task_struct (must be preserved across the switch)
+ *   x1 = next task_struct
+ * Previous and next are guaranteed not to be the same.
+ *
+ */
+ENTRY(cpu_switch_to)
+	add	x8, x0, #THREAD_CPU_CONTEXT
+	mov	x9, sp
+	stp	x19, x20, [x8], #16		// store callee-saved registers
+	stp	x21, x22, [x8], #16
+	stp	x23, x24, [x8], #16
+	stp	x25, x26, [x8], #16
+	stp	x27, x28, [x8], #16
+	stp	x29, x9, [x8], #16
+	str	lr, [x8]
+	add	x8, x1, #THREAD_CPU_CONTEXT
+	ldp	x19, x20, [x8], #16		// restore callee-saved registers
+	ldp	x21, x22, [x8], #16
+	ldp	x23, x24, [x8], #16
+	ldp	x25, x26, [x8], #16
+	ldp	x27, x28, [x8], #16
+	ldp	x29, x9, [x8], #16
+	ldr	lr, [x8]
+	mov	sp, x9
+	ret
+ENDPROC(cpu_switch_to)
+
+/*
+ * This is the fast syscall return path.  We do as little as possible here,
+ * and this includes saving x0 back into the kernel stack.
+ */
+ret_fast_syscall:
+	disable_irq				// disable interrupts
+	ldr	x1, [tsk, #TI_FLAGS]
+	and	x2, x1, #_TIF_WORK_MASK
+	cbnz	x2, fast_work_pending
+	enable_step_tsk x1, x2
+	kernel_exit 0, ret = 1
+
+/*
+ * Ok, we need to do extra processing, enter the slow path.
+ */
+fast_work_pending:
+	str	x0, [sp, #S_X0]			// returned x0
+work_pending:
+	tbnz	x1, #TIF_NEED_RESCHED, work_resched
+	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
+	ldr	x2, [sp, #S_PSTATE]
+	mov	x0, sp				// 'regs'
+	tst	x2, #PSR_MODE_MASK		// user mode regs?
+	b.ne	no_work_pending			// returning to kernel
+	enable_irq				// enable interrupts for do_notify_resume()
+	bl	do_notify_resume
+	b	ret_to_user
+work_resched:
+	bl	schedule
+
+/*
+ * "slow" syscall return path.
+ */
+ret_to_user:
+	disable_irq				// disable interrupts
+	ldr	x1, [tsk, #TI_FLAGS]
+	and	x2, x1, #_TIF_WORK_MASK
+	cbnz	x2, work_pending
+	enable_step_tsk x1, x2
+no_work_pending:
+	kernel_exit 0, ret = 0
+ENDPROC(ret_to_user)
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+	bl	schedule_tail
+	cbz	x19, 1f				// not a kernel thread
+	mov	x0, x20
+	blr	x19
+1:	get_thread_info tsk
+	b	ret_to_user
+ENDPROC(ret_from_fork)
+
+/*
+ * SVC handler.
+ */
+	.align	6
+el0_svc:
+	adrp	stbl, sys_call_table		// load syscall table pointer
+	uxtw	scno, w8			// syscall number in w8
+	mov	sc_nr, #__NR_syscalls
+el0_svc_naked:					// compat entry point
+	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
+	enable_dbg_and_irq
+	ct_user_exit 1
+
+	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks
+	tst	x16, #_TIF_SYSCALL_WORK
+	b.ne	__sys_trace
+	cmp     scno, sc_nr                     // check upper syscall limit
+	b.hs	ni_sys
+	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	blr	x16				// call sys_* routine
+	b	ret_fast_syscall
+ni_sys:
+	mov	x0, sp
+	bl	do_ni_syscall
+	b	ret_fast_syscall
+ENDPROC(el0_svc)
+
+	/*
+	 * This is the really slow path.  We're going to be doing context
+	 * switches, and waiting for our parent to respond.
+	 */
+__sys_trace:
+	mov	w0, #-1				// set default errno for
+	cmp     scno, x0			// user-issued syscall(-1)
+	b.ne	1f
+	mov	x0, #-ENOSYS
+	str	x0, [sp, #S_X0]
+1:	mov	x0, sp
+	bl	syscall_trace_enter
+	cmp	w0, #-1				// skip the syscall?
+	b.eq	__sys_trace_return_skipped
+	uxtw	scno, w0			// syscall number (possibly new)
+	mov	x1, sp				// pointer to regs
+	cmp	scno, sc_nr			// check upper syscall limit
+	b.hs	__ni_sys_trace
+	ldp	x0, x1, [sp]			// restore the syscall args
+	ldp	x2, x3, [sp, #S_X2]
+	ldp	x4, x5, [sp, #S_X4]
+	ldp	x6, x7, [sp, #S_X6]
+	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	blr	x16				// call sys_* routine
+
+__sys_trace_return:
+	str	x0, [sp, #S_X0]			// save returned x0
+__sys_trace_return_skipped:
+	mov	x0, sp
+	bl	syscall_trace_exit
+	b	ret_to_user
+
+__ni_sys_trace:
+	mov	x0, sp
+	bl	do_ni_syscall
+	b	__sys_trace_return
+
+/*
+ * Special system call wrappers.
+ */
+ENTRY(sys_rt_sigreturn_wrapper)
+	mov	x0, sp
+	b	sys_rt_sigreturn
+ENDPROC(sys_rt_sigreturn_wrapper)
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
new file mode 100644
index 000000000..bd9bfaa92
--- /dev/null
+++ b/arch/arm64/kernel/entry32.S
@@ -0,0 +1,123 @@
+/*
+ * Compat system call wrappers
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Authors: Will Deacon <will.deacon@arm.com>
+ *	    Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/const.h>
+
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/page.h>
+
+/*
+ * System call wrappers for the AArch32 compatibility layer.
+ */
+
+ENTRY(compat_sys_sigreturn_wrapper)
+	mov	x0, sp
+	mov	x27, #0		// prevent syscall restart handling (why)
+	b	compat_sys_sigreturn
+ENDPROC(compat_sys_sigreturn_wrapper)
+
+ENTRY(compat_sys_rt_sigreturn_wrapper)
+	mov	x0, sp
+	mov	x27, #0		// prevent syscall restart handling (why)
+	b	compat_sys_rt_sigreturn
+ENDPROC(compat_sys_rt_sigreturn_wrapper)
+
+ENTRY(compat_sys_statfs64_wrapper)
+	mov	w3, #84
+	cmp	w1, #88
+	csel	w1, w3, w1, eq
+	b	compat_sys_statfs64
+ENDPROC(compat_sys_statfs64_wrapper)
+
+ENTRY(compat_sys_fstatfs64_wrapper)
+	mov	w3, #84
+	cmp	w1, #88
+	csel	w1, w3, w1, eq
+	b	compat_sys_fstatfs64
+ENDPROC(compat_sys_fstatfs64_wrapper)
+
+/*
+ * Note: off_4k (w5) is always in units of 4K. If we can't do the
+ * requested offset because it is not page-aligned, we return -EINVAL.
+ */
+ENTRY(compat_sys_mmap2_wrapper)
+#if PAGE_SHIFT > 12
+	tst	w5, #~PAGE_MASK >> 12
+	b.ne	1f
+	lsr	w5, w5, #PAGE_SHIFT - 12
+#endif
+	b	sys_mmap_pgoff
+1:	mov	x0, #-EINVAL
+	ret
+ENDPROC(compat_sys_mmap2_wrapper)
+
+/*
+ * Wrappers for AArch32 syscalls that either take 64-bit parameters
+ * in registers or that take 32-bit parameters which require sign
+ * extension.
+ */
+ENTRY(compat_sys_pread64_wrapper)
+	regs_to_64	x3, x4, x5
+	b	sys_pread64
+ENDPROC(compat_sys_pread64_wrapper)
+
+ENTRY(compat_sys_pwrite64_wrapper)
+	regs_to_64	x3, x4, x5
+	b	sys_pwrite64
+ENDPROC(compat_sys_pwrite64_wrapper)
+
+ENTRY(compat_sys_truncate64_wrapper)
+	regs_to_64	x1, x2, x3
+	b	sys_truncate
+ENDPROC(compat_sys_truncate64_wrapper)
+
+ENTRY(compat_sys_ftruncate64_wrapper)
+	regs_to_64	x1, x2, x3
+	b	sys_ftruncate
+ENDPROC(compat_sys_ftruncate64_wrapper)
+
+ENTRY(compat_sys_readahead_wrapper)
+	regs_to_64	x1, x2, x3
+	mov	w2, w4
+	b	sys_readahead
+ENDPROC(compat_sys_readahead_wrapper)
+
+ENTRY(compat_sys_fadvise64_64_wrapper)
+	mov	w6, w1
+	regs_to_64	x1, x2, x3
+	regs_to_64	x2, x4, x5
+	mov	w3, w6
+	b	sys_fadvise64_64
+ENDPROC(compat_sys_fadvise64_64_wrapper)
+
+ENTRY(compat_sys_sync_file_range2_wrapper)
+	regs_to_64	x2, x2, x3
+	regs_to_64	x3, x4, x5
+	b	sys_sync_file_range2
+ENDPROC(compat_sys_sync_file_range2_wrapper)
+
+ENTRY(compat_sys_fallocate_wrapper)
+	regs_to_64	x2, x2, x3
+	regs_to_64	x3, x4, x5
+	b	sys_fallocate
+ENDPROC(compat_sys_fallocate_wrapper)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
new file mode 100644
index 000000000..3dca15634
--- /dev/null
+++ b/arch/arm64/kernel/fpsimd.c
@@ -0,0 +1,321 @@
+/*
+ * FP/SIMD context switching and fault handling
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/hardirq.h>
+
+#include <asm/fpsimd.h>
+#include <asm/cputype.h>
+
+#define FPEXC_IOF	(1 << 0)
+#define FPEXC_DZF	(1 << 1)
+#define FPEXC_OFF	(1 << 2)
+#define FPEXC_UFF	(1 << 3)
+#define FPEXC_IXF	(1 << 4)
+#define FPEXC_IDF	(1 << 7)
+
+/*
+ * In order to reduce the number of times the FPSIMD state is needlessly saved
+ * and restored, we need to keep track of two things:
+ * (a) for each task, we need to remember which CPU was the last one to have
+ *     the task's FPSIMD state loaded into its FPSIMD registers;
+ * (b) for each CPU, we need to remember which task's userland FPSIMD state has
+ *     been loaded into its FPSIMD registers most recently, or whether it has
+ *     been used to perform kernel mode NEON in the meantime.
+ *
+ * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
+ * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
+ * address of the userland FPSIMD state of the task that was loaded onto the CPU
+ * the most recently, or NULL if kernel mode NEON has been performed after that.
+ *
+ * With this in place, we no longer have to restore the next FPSIMD state right
+ * when switching between tasks. Instead, we can defer this check to userland
+ * resume, at which time we verify whether the CPU's fpsimd_last_state and the
+ * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
+ * can omit the FPSIMD restore.
+ *
+ * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
+ * indicate whether or not the userland FPSIMD state of the current task is
+ * present in the registers. The flag is set unless the FPSIMD registers of this
+ * CPU currently contain the most recent userland FPSIMD state of the current
+ * task.
+ *
+ * For a certain task, the sequence may look something like this:
+ * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
+ *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
+ *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
+ *   cleared, otherwise it is set;
+ *
+ * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
+ *   userland FPSIMD state is copied from memory to the registers, the task's
+ *   fpsimd_state.cpu field is set to the id of the current CPU, the current
+ *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
+ *   TIF_FOREIGN_FPSTATE flag is cleared;
+ *
+ * - the task executes an ordinary syscall; upon return to userland, the
+ *   TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
+ *   restored;
+ *
+ * - the task executes a syscall which executes some NEON instructions; this is
+ *   preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
+ *   register contents to memory, clears the fpsimd_last_state per-cpu variable
+ *   and sets the TIF_FOREIGN_FPSTATE flag;
+ *
+ * - the task gets preempted after kernel_neon_end() is called; as we have not
+ *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
+ *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
+ */
+static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+
+/*
+ * Trapped FP/ASIMD access.
+ */
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+{
+	/* TODO: implement lazy context saving/restoring */
+	WARN_ON(1);
+}
+
+/*
+ * Raise a SIGFPE for the current process.
+ */
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+{
+	siginfo_t info;
+	unsigned int si_code = 0;
+
+	if (esr & FPEXC_IOF)
+		si_code = FPE_FLTINV;
+	else if (esr & FPEXC_DZF)
+		si_code = FPE_FLTDIV;
+	else if (esr & FPEXC_OFF)
+		si_code = FPE_FLTOVF;
+	else if (esr & FPEXC_UFF)
+		si_code = FPE_FLTUND;
+	else if (esr & FPEXC_IXF)
+		si_code = FPE_FLTRES;
+
+	memset(&info, 0, sizeof(info));
+	info.si_signo = SIGFPE;
+	info.si_code = si_code;
+	info.si_addr = (void __user *)instruction_pointer(regs);
+
+	send_sig_info(SIGFPE, &info, current);
+}
+
+void fpsimd_thread_switch(struct task_struct *next)
+{
+	/*
+	 * Save the current FPSIMD state to memory, but only if whatever is in
+	 * the registers is in fact the most recent userland FPSIMD state of
+	 * 'current'.
+	 */
+	if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_save_state(&current->thread.fpsimd_state);
+
+	if (next->mm) {
+		/*
+		 * If we are switching to a task whose most recent userland
+		 * FPSIMD state is already in the registers of *this* cpu,
+		 * we can skip loading the state from memory. Otherwise, set
+		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
+		 * upon the next return to userland.
+		 */
+		struct fpsimd_state *st = &next->thread.fpsimd_state;
+
+		if (__this_cpu_read(fpsimd_last_state) == st
+		    && st->cpu == smp_processor_id())
+			clear_ti_thread_flag(task_thread_info(next),
+					     TIF_FOREIGN_FPSTATE);
+		else
+			set_ti_thread_flag(task_thread_info(next),
+					   TIF_FOREIGN_FPSTATE);
+	}
+}
+
+void fpsimd_flush_thread(void)
+{
+	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
+	set_thread_flag(TIF_FOREIGN_FPSTATE);
+}
+
+/*
+ * Save the userland FPSIMD state of 'current' to memory, but only if the state
+ * currently held in the registers does in fact belong to 'current'
+ */
+void fpsimd_preserve_current_state(void)
+{
+	preempt_disable();
+	if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_save_state(&current->thread.fpsimd_state);
+	preempt_enable();
+}
+
+/*
+ * Load the userland FPSIMD state of 'current' from memory, but only if the
+ * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
+ * state of 'current'
+ */
+void fpsimd_restore_current_state(void)
+{
+	preempt_disable();
+	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+		fpsimd_load_state(st);
+		this_cpu_write(fpsimd_last_state, st);
+		st->cpu = smp_processor_id();
+	}
+	preempt_enable();
+}
+
+/*
+ * Load an updated userland FPSIMD state for 'current' from memory and set the
+ * flag that indicates that the FPSIMD register contents are the most recent
+ * FPSIMD state of 'current'
+ */
+void fpsimd_update_current_state(struct fpsimd_state *state)
+{
+	preempt_disable();
+	fpsimd_load_state(state);
+	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+		this_cpu_write(fpsimd_last_state, st);
+		st->cpu = smp_processor_id();
+	}
+	preempt_enable();
+}
+
+/*
+ * Invalidate live CPU copies of task t's FPSIMD state
+ */
+void fpsimd_flush_task_state(struct task_struct *t)
+{
+	t->thread.fpsimd_state.cpu = NR_CPUS;
+}
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+
+/*
+ * Kernel-side NEON support functions
+ */
+void kernel_neon_begin_partial(u32 num_regs)
+{
+	if (in_interrupt()) {
+		struct fpsimd_partial_state *s = this_cpu_ptr(
+			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+
+		BUG_ON(num_regs > 32);
+		fpsimd_save_partial_state(s, roundup(num_regs, 2));
+	} else {
+		/*
+		 * Save the userland FPSIMD state if we have one and if we
+		 * haven't done so already. Clear fpsimd_last_state to indicate
+		 * that there is no longer userland FPSIMD state in the
+		 * registers.
+		 */
+		preempt_disable();
+		if (current->mm &&
+		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+			fpsimd_save_state(&current->thread.fpsimd_state);
+		this_cpu_write(fpsimd_last_state, NULL);
+	}
+}
+EXPORT_SYMBOL(kernel_neon_begin_partial);
+
+void kernel_neon_end(void)
+{
+	if (in_interrupt()) {
+		struct fpsimd_partial_state *s = this_cpu_ptr(
+			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+		fpsimd_load_partial_state(s);
+	} else {
+		preempt_enable();
+	}
+}
+EXPORT_SYMBOL(kernel_neon_end);
+
+#endif /* CONFIG_KERNEL_MODE_NEON */
+
+#ifdef CONFIG_CPU_PM
+static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
+				  unsigned long cmd, void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
+			fpsimd_save_state(&current->thread.fpsimd_state);
+		this_cpu_write(fpsimd_last_state, NULL);
+		break;
+	case CPU_PM_EXIT:
+		if (current->mm)
+			set_thread_flag(TIF_FOREIGN_FPSTATE);
+		break;
+	case CPU_PM_ENTER_FAILED:
+	default:
+		return NOTIFY_DONE;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block fpsimd_cpu_pm_notifier_block = {
+	.notifier_call = fpsimd_cpu_pm_notifier,
+};
+
+static void fpsimd_pm_init(void)
+{
+	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
+}
+
+#else
+static inline void fpsimd_pm_init(void) { }
+#endif /* CONFIG_CPU_PM */
+
+/*
+ * FP/SIMD support code initialisation.
+ */
+static int __init fpsimd_init(void)
+{
+	u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+
+	if (pfr & (0xf << 16)) {
+		pr_notice("Floating-point is not implemented\n");
+		return 0;
+	}
+	elf_hwcap |= HWCAP_FP;
+
+	if (pfr & (0xf << 20))
+		pr_notice("Advanced SIMD is not implemented\n");
+	else
+		elf_hwcap |= HWCAP_ASIMD;
+
+	fpsimd_pm_init();
+
+	return 0;
+}
+late_initcall(fpsimd_init);
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
new file mode 100644
index 000000000..c851be795
--- /dev/null
+++ b/arch/arm64/kernel/ftrace.c
@@ -0,0 +1,178 @@
+/*
+ * arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/swab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Replace a single instruction, which may be a branch or NOP.
+ * If @validate == true, a replaced instruction is checked against 'old'.
+ */
+static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
+			      bool validate)
+{
+	u32 replaced;
+
+	/*
+	 * Note:
+	 * Due to modules and __init, code can disappear and change,
+	 * we need to protect against faulting as well as code changing.
+	 * We do this by aarch64_insn_*() which use the probe_kernel_*().
+	 *
+	 * No lock is held here because all the modifications are run
+	 * through stop_machine().
+	 */
+	if (validate) {
+		if (aarch64_insn_read((void *)pc, &replaced))
+			return -EFAULT;
+
+		if (replaced != old)
+			return -EINVAL;
+	}
+	if (aarch64_insn_patch_text_nosync((void *)pc, new))
+		return -EPERM;
+
+	return 0;
+}
+
+/*
+ * Replace tracer function in ftrace_caller()
+ */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long pc;
+	u32 new;
+
+	pc = (unsigned long)&ftrace_call;
+	new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func,
+					  AARCH64_INSN_BRANCH_LINK);
+
+	return ftrace_modify_code(pc, 0, new, false);
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	u32 old, new;
+
+	old = aarch64_insn_gen_nop();
+	new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * Turn off the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	u32 old, new;
+
+	old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+	new = aarch64_insn_gen_nop();
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	unsigned long old;
+	struct ftrace_graph_ent trace;
+	int err;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	/*
+	 * Note:
+	 * No protection against faulting at *parent, which may be seen
+	 * on other archs. It's unlikely on AArch64.
+	 */
+	old = *parent;
+	*parent = return_hooker;
+
+	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		*parent = old;
+		return;
+	}
+
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+				       frame_pointer);
+	if (err == -EBUSY) {
+		*parent = old;
+		return;
+	}
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
+ * depending on @enable.
+ */
+static int ftrace_modify_graph_caller(bool enable)
+{
+	unsigned long pc = (unsigned long)&ftrace_graph_call;
+	u32 branch, nop;
+
+	branch = aarch64_insn_gen_branch_imm(pc,
+					     (unsigned long)ftrace_graph_caller,
+					     AARCH64_INSN_BRANCH_NOLINK);
+	nop = aarch64_insn_gen_nop();
+
+	if (enable)
+		return ftrace_modify_code(pc, nop, branch, true);
+	else
+		return ftrace_modify_code(pc, branch, nop, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
new file mode 100644
index 000000000..19f915e8f
--- /dev/null
+++ b/arch/arm64/kernel/head.S
@@ -0,0 +1,681 @@
+/*
+ * Low-level CPU initialisation
+ * Based on arch/arm/kernel/head.S
+ *
+ * Copyright (C) 1994-2002 Russell King
+ * Copyright (C) 2003-2012 ARM Ltd.
+ * Authors:	Catalin Marinas <catalin.marinas@arm.com>
+ *		Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/assembler.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/cputype.h>
+#include <asm/memory.h>
+#include <asm/thread_info.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/virt.h>
+
+#define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)
+
+#if (TEXT_OFFSET & 0xfff) != 0
+#error TEXT_OFFSET must be at least 4KB aligned
+#elif (PAGE_OFFSET & 0x1fffff) != 0
+#error PAGE_OFFSET must be at least 2MB aligned
+#elif TEXT_OFFSET > 0x1fffff
+#error TEXT_OFFSET must be less than 2MB
+#endif
+
+#ifdef CONFIG_ARM64_64K_PAGES
+#define BLOCK_SHIFT	PAGE_SHIFT
+#define BLOCK_SIZE	PAGE_SIZE
+#define TABLE_SHIFT	PMD_SHIFT
+#else
+#define BLOCK_SHIFT	SECTION_SHIFT
+#define BLOCK_SIZE	SECTION_SIZE
+#define TABLE_SHIFT	PUD_SHIFT
+#endif
+
+#define KERNEL_START	_text
+#define KERNEL_END	_end
+
+/*
+ * Initial memory map attributes.
+ */
+#ifndef CONFIG_SMP
+#define PTE_FLAGS	PTE_TYPE_PAGE | PTE_AF
+#define PMD_FLAGS	PMD_TYPE_SECT | PMD_SECT_AF
+#else
+#define PTE_FLAGS	PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
+#define PMD_FLAGS	PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
+#endif
+
+#ifdef CONFIG_ARM64_64K_PAGES
+#define MM_MMUFLAGS	PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
+#else
+#define MM_MMUFLAGS	PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
+#endif
+
+/*
+ * Kernel startup entry point.
+ * ---------------------------
+ *
+ * The requirements are:
+ *   MMU = off, D-cache = off, I-cache = on or off,
+ *   x0 = physical address to the FDT blob.
+ *
+ * This code is mostly position independent so you call this at
+ * __pa(PAGE_OFFSET + TEXT_OFFSET).
+ *
+ * Note that the callee-saved registers are used for storing variables
+ * that are useful before the MMU is enabled. The allocations are described
+ * in the entry routines.
+ */
+	__HEAD
+
+	/*
+	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
+	 */
+#ifdef CONFIG_EFI
+efi_head:
+	/*
+	 * This add instruction has no meaningful effect except that
+	 * its opcode forms the magic "MZ" signature required by UEFI.
+	 */
+	add	x13, x18, #0x16
+	b	stext
+#else
+	b	stext				// branch to kernel start, magic
+	.long	0				// reserved
+#endif
+	.quad	_kernel_offset_le		// Image load offset from start of RAM, little-endian
+	.quad	_kernel_size_le			// Effective size of kernel image, little-endian
+	.quad	_kernel_flags_le		// Informative flags, little-endian
+	.quad	0				// reserved
+	.quad	0				// reserved
+	.quad	0				// reserved
+	.byte	0x41				// Magic number, "ARM\x64"
+	.byte	0x52
+	.byte	0x4d
+	.byte	0x64
+#ifdef CONFIG_EFI
+	.long	pe_header - efi_head		// Offset to the PE header.
+#else
+	.word	0				// reserved
+#endif
+
+#ifdef CONFIG_EFI
+	.globl	stext_offset
+	.set	stext_offset, stext - efi_head
+	.align 3
+pe_header:
+	.ascii	"PE"
+	.short 	0
+coff_header:
+	.short	0xaa64				// AArch64
+	.short	2				// nr_sections
+	.long	0 				// TimeDateStamp
+	.long	0				// PointerToSymbolTable
+	.long	1				// NumberOfSymbols
+	.short	section_table - optional_header	// SizeOfOptionalHeader
+	.short	0x206				// Characteristics.
+						// IMAGE_FILE_DEBUG_STRIPPED |
+						// IMAGE_FILE_EXECUTABLE_IMAGE |
+						// IMAGE_FILE_LINE_NUMS_STRIPPED
+optional_header:
+	.short	0x20b				// PE32+ format
+	.byte	0x02				// MajorLinkerVersion
+	.byte	0x14				// MinorLinkerVersion
+	.long	_end - stext			// SizeOfCode
+	.long	0				// SizeOfInitializedData
+	.long	0				// SizeOfUninitializedData
+	.long	efi_stub_entry - efi_head	// AddressOfEntryPoint
+	.long	stext_offset			// BaseOfCode
+
+extra_header_fields:
+	.quad	0				// ImageBase
+	.long	0x1000				// SectionAlignment
+	.long	PECOFF_FILE_ALIGNMENT		// FileAlignment
+	.short	0				// MajorOperatingSystemVersion
+	.short	0				// MinorOperatingSystemVersion
+	.short	0				// MajorImageVersion
+	.short	0				// MinorImageVersion
+	.short	0				// MajorSubsystemVersion
+	.short	0				// MinorSubsystemVersion
+	.long	0				// Win32VersionValue
+
+	.long	_end - efi_head			// SizeOfImage
+
+	// Everything before the kernel image is considered part of the header
+	.long	stext_offset			// SizeOfHeaders
+	.long	0				// CheckSum
+	.short	0xa				// Subsystem (EFI application)
+	.short	0				// DllCharacteristics
+	.quad	0				// SizeOfStackReserve
+	.quad	0				// SizeOfStackCommit
+	.quad	0				// SizeOfHeapReserve
+	.quad	0				// SizeOfHeapCommit
+	.long	0				// LoaderFlags
+	.long	0x6				// NumberOfRvaAndSizes
+
+	.quad	0				// ExportTable
+	.quad	0				// ImportTable
+	.quad	0				// ResourceTable
+	.quad	0				// ExceptionTable
+	.quad	0				// CertificationTable
+	.quad	0				// BaseRelocationTable
+
+	// Section table
+section_table:
+
+	/*
+	 * The EFI application loader requires a relocation section
+	 * because EFI applications must be relocatable.  This is a
+	 * dummy section as far as we are concerned.
+	 */
+	.ascii	".reloc"
+	.byte	0
+	.byte	0			// end of 0 padding of section name
+	.long	0
+	.long	0
+	.long	0			// SizeOfRawData
+	.long	0			// PointerToRawData
+	.long	0			// PointerToRelocations
+	.long	0			// PointerToLineNumbers
+	.short	0			// NumberOfRelocations
+	.short	0			// NumberOfLineNumbers
+	.long	0x42100040		// Characteristics (section flags)
+
+
+	.ascii	".text"
+	.byte	0
+	.byte	0
+	.byte	0        		// end of 0 padding of section name
+	.long	_end - stext		// VirtualSize
+	.long	stext_offset		// VirtualAddress
+	.long	_edata - stext		// SizeOfRawData
+	.long	stext_offset		// PointerToRawData
+
+	.long	0		// PointerToRelocations (0 for executables)
+	.long	0		// PointerToLineNumbers (0 for executables)
+	.short	0		// NumberOfRelocations  (0 for executables)
+	.short	0		// NumberOfLineNumbers  (0 for executables)
+	.long	0xe0500020	// Characteristics (section flags)
+
+	/*
+	 * EFI will load stext onwards at the 4k section alignment
+	 * described in the PE/COFF header. To ensure that instruction
+	 * sequences using an adrp and a :lo12: immediate will function
+	 * correctly at this alignment, we must ensure that stext is
+	 * placed at a 4k boundary in the Image to begin with.
+	 */
+	.align 12
+#endif
+
+ENTRY(stext)
+	bl	preserve_boot_args
+	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
+	adrp	x24, __PHYS_OFFSET
+	bl	set_cpu_boot_mode_flag
+
+	bl	__vet_fdt
+	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
+	/*
+	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
+	 * details.
+	 * On return, the CPU will be ready for the MMU to be turned on and
+	 * the TCR will have been set.
+	 */
+	ldr	x27, =__mmap_switched		// address to jump to after
+						// MMU has been enabled
+	adr_l	lr, __enable_mmu		// return (PIC) address
+	b	__cpu_setup			// initialise processor
+ENDPROC(stext)
+
+/*
+ * Preserve the arguments passed by the bootloader in x0 .. x3
+ */
+preserve_boot_args:
+	mov	x21, x0				// x21=FDT
+
+	adr_l	x0, boot_args			// record the contents of
+	stp	x21, x1, [x0]			// x0 .. x3 at kernel entry
+	stp	x2, x3, [x0, #16]
+
+	dmb	sy				// needed before dc ivac with
+						// MMU off
+
+	add	x1, x0, #0x20			// 4 x 8 bytes
+	b	__inval_cache_range		// tail call
+ENDPROC(preserve_boot_args)
+
+/*
+ * Determine validity of the x21 FDT pointer.
+ * The dtb must be 8-byte aligned and live in the first 512M of memory.
+ */
+__vet_fdt:
+	tst	x21, #0x7
+	b.ne	1f
+	cmp	x21, x24
+	b.lt	1f
+	mov	x0, #(1 << 29)
+	add	x0, x0, x24
+	cmp	x21, x0
+	b.ge	1f
+	ret
+1:
+	mov	x21, #0
+	ret
+ENDPROC(__vet_fdt)
+/*
+ * Macro to create a table entry to the next page.
+ *
+ *	tbl:	page table address
+ *	virt:	virtual address
+ *	shift:	#imm page table shift
+ *	ptrs:	#imm pointers per table page
+ *
+ * Preserves:	virt
+ * Corrupts:	tmp1, tmp2
+ * Returns:	tbl -> next level table page address
+ */
+	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
+	lsr	\tmp1, \virt, #\shift
+	and	\tmp1, \tmp1, #\ptrs - 1	// table index
+	add	\tmp2, \tbl, #PAGE_SIZE
+	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type
+	str	\tmp2, [\tbl, \tmp1, lsl #3]
+	add	\tbl, \tbl, #PAGE_SIZE		// next level table page
+	.endm
+
+/*
+ * Macro to populate the PGD (and possibily PUD) for the corresponding
+ * block entry in the next level (tbl) for the given virtual address.
+ *
+ * Preserves:	tbl, next, virt
+ * Corrupts:	tmp1, tmp2
+ */
+	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
+	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
+#if SWAPPER_PGTABLE_LEVELS == 3
+	create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+#endif
+	.endm
+
+/*
+ * Macro to populate block entries in the page table for the start..end
+ * virtual range (inclusive).
+ *
+ * Preserves:	tbl, flags
+ * Corrupts:	phys, start, end, pstate
+ */
+	.macro	create_block_map, tbl, flags, phys, start, end
+	lsr	\phys, \phys, #BLOCK_SHIFT
+	lsr	\start, \start, #BLOCK_SHIFT
+	and	\start, \start, #PTRS_PER_PTE - 1	// table index
+	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
+	lsr	\end, \end, #BLOCK_SHIFT
+	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
+9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
+	add	\start, \start, #1			// next entry
+	add	\phys, \phys, #BLOCK_SIZE		// next block
+	cmp	\start, \end
+	b.ls	9999b
+	.endm
+
+/*
+ * Setup the initial page tables. We only setup the barest amount which is
+ * required to get the kernel running. The following sections are required:
+ *   - identity mapping to enable the MMU (low address, TTBR0)
+ *   - first few MB of the kernel linear mapping to jump to once the MMU has
+ *     been enabled, including the FDT blob (TTBR1)
+ *   - pgd entry for fixed mappings (TTBR1)
+ */
+__create_page_tables:
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
+	mov	x27, lr
+
+	/*
+	 * Invalidate the idmap and swapper page tables to avoid potential
+	 * dirty cache lines being evicted.
+	 */
+	mov	x0, x25
+	add	x1, x26, #SWAPPER_DIR_SIZE
+	bl	__inval_cache_range
+
+	/*
+	 * Clear the idmap and swapper page tables.
+	 */
+	mov	x0, x25
+	add	x6, x26, #SWAPPER_DIR_SIZE
+1:	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	cmp	x0, x6
+	b.lo	1b
+
+	ldr	x7, =MM_MMUFLAGS
+
+	/*
+	 * Create the identity mapping.
+	 */
+	mov	x0, x25				// idmap_pg_dir
+	adrp	x3, KERNEL_START		// __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))
+
+	/*
+	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+	 * created that covers system RAM if that is located sufficiently high
+	 * in the physical address space. So for the ID map, use an extended
+	 * virtual range in that case, by configuring an additional translation
+	 * level.
+	 * First, we have to verify our assumption that the current value of
+	 * VA_BITS was chosen such that all translation levels are fully
+	 * utilised, and that lowering T0SZ will always result in an additional
+	 * translation level to be configured.
+	 */
+#if VA_BITS != EXTRA_SHIFT
+#error "Mismatch between VA_BITS and page size/number of translation levels"
+#endif
+
+	/*
+	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+	 * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
+	 * this number conveniently equals the number of leading zeroes in
+	 * the physical address of KERNEL_END.
+	 */
+	adrp	x5, KERNEL_END
+	clz	x5, x5
+	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
+	b.ge	1f			// .. then skip additional level
+
+	adr_l	x6, idmap_t0sz
+	str	x5, [x6]
+	dmb	sy
+	dc	ivac, x6		// Invalidate potentially stale cache line
+
+	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
+1:
+#endif
+
+	create_pgd_entry x0, x3, x5, x6
+	mov	x5, x3				// __pa(KERNEL_START)
+	adr_l	x6, KERNEL_END			// __pa(KERNEL_END)
+	create_block_map x0, x7, x3, x5, x6
+
+	/*
+	 * Map the kernel image (starting with PHYS_OFFSET).
+	 */
+	mov	x0, x26				// swapper_pg_dir
+	mov	x5, #PAGE_OFFSET
+	create_pgd_entry x0, x5, x3, x6
+	ldr	x6, =KERNEL_END			// __va(KERNEL_END)
+	mov	x3, x24				// phys offset
+	create_block_map x0, x7, x3, x5, x6
+
+	/*
+	 * Map the FDT blob (maximum 2MB; must be within 512MB of
+	 * PHYS_OFFSET).
+	 */
+	mov	x3, x21				// FDT phys address
+	and	x3, x3, #~((1 << 21) - 1)	// 2MB aligned
+	mov	x6, #PAGE_OFFSET
+	sub	x5, x3, x24			// subtract PHYS_OFFSET
+	tst	x5, #~((1 << 29) - 1)		// within 512MB?
+	csel	x21, xzr, x21, ne		// zero the FDT pointer
+	b.ne	1f
+	add	x5, x5, x6			// __va(FDT blob)
+	add	x6, x5, #1 << 21		// 2MB for the FDT blob
+	sub	x6, x6, #1			// inclusive range
+	create_block_map x0, x7, x3, x5, x6
+1:
+	/*
+	 * Since the page tables have been populated with non-cacheable
+	 * accesses (MMU disabled), invalidate the idmap and swapper page
+	 * tables again to remove any speculatively loaded cache lines.
+	 */
+	mov	x0, x25
+	add	x1, x26, #SWAPPER_DIR_SIZE
+	dmb	sy
+	bl	__inval_cache_range
+
+	mov	lr, x27
+	ret
+ENDPROC(__create_page_tables)
+	.ltorg
+
+/*
+ * The following fragment of code is executed with the MMU enabled.
+ */
+	.set	initial_sp, init_thread_union + THREAD_START_SP
+__mmap_switched:
+	adr_l	x6, __bss_start
+	adr_l	x7, __bss_stop
+
+1:	cmp	x6, x7
+	b.hs	2f
+	str	xzr, [x6], #8			// Clear BSS
+	b	1b
+2:
+	adr_l	sp, initial_sp, x4
+	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
+	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
+	mov	x29, #0
+	b	start_kernel
+ENDPROC(__mmap_switched)
+
+/*
+ * end early head section, begin head code that is also used for
+ * hotplug and needs to have the same protections as the text region
+ */
+	.section ".text","ax"
+/*
+ * If we're fortunate enough to boot at EL2, ensure that the world is
+ * sane before dropping to EL1.
+ *
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
+ * booted in EL1 or EL2 respectively.
+ */
+ENTRY(el2_setup)
+	mrs	x0, CurrentEL
+	cmp	x0, #CurrentEL_EL2
+	b.ne	1f
+	mrs	x0, sctlr_el2
+CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2
+CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2
+	msr	sctlr_el2, x0
+	b	2f
+1:	mrs	x0, sctlr_el1
+CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1
+CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
+	msr	sctlr_el1, x0
+	mov	w20, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
+	isb
+	ret
+
+	/* Hyp configuration. */
+2:	mov	x0, #(1 << 31)			// 64-bit EL1
+	msr	hcr_el2, x0
+
+	/* Generic timers. */
+	mrs	x0, cnthctl_el2
+	orr	x0, x0, #3			// Enable EL1 physical timers
+	msr	cnthctl_el2, x0
+	msr	cntvoff_el2, xzr		// Clear virtual offset
+
+#ifdef CONFIG_ARM_GIC_V3
+	/* GICv3 system register access */
+	mrs	x0, id_aa64pfr0_el1
+	ubfx	x0, x0, #24, #4
+	cmp	x0, #1
+	b.ne	3f
+
+	mrs_s	x0, ICC_SRE_EL2
+	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
+	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
+	msr_s	ICC_SRE_EL2, x0
+	isb					// Make sure SRE is now set
+	msr_s	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+
+3:
+#endif
+
+	/* Populate ID registers. */
+	mrs	x0, midr_el1
+	mrs	x1, mpidr_el1
+	msr	vpidr_el2, x0
+	msr	vmpidr_el2, x1
+
+	/* sctlr_el1 */
+	mov	x0, #0x0800			// Set/clear RES{1,0} bits
+CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems
+CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
+	msr	sctlr_el1, x0
+
+	/* Coprocessor traps. */
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0			// Disable copro. traps to EL2
+
+#ifdef CONFIG_COMPAT
+	msr	hstr_el2, xzr			// Disable CP15 traps to EL2
+#endif
+
+	/* Stage-2 translation */
+	msr	vttbr_el2, xzr
+
+	/* Hypervisor stub */
+	adrp	x0, __hyp_stub_vectors
+	add	x0, x0, #:lo12:__hyp_stub_vectors
+	msr	vbar_el2, x0
+
+	/* spsr */
+	mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, x0
+	msr	elr_el2, lr
+	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
+	eret
+ENDPROC(el2_setup)
+
+/*
+ * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
+ * in x20. See arch/arm64/include/asm/virt.h for more info.
+ */
+ENTRY(set_cpu_boot_mode_flag)
+	adr_l	x1, __boot_cpu_mode
+	cmp	w20, #BOOT_CPU_MODE_EL2
+	b.ne	1f
+	add	x1, x1, #4
+1:	str	w20, [x1]			// This CPU has booted in EL1
+	dmb	sy
+	dc	ivac, x1			// Invalidate potentially stale cache line
+	ret
+ENDPROC(set_cpu_boot_mode_flag)
+
+/*
+ * We need to find out the CPU boot mode long after boot, so we need to
+ * store it in a writable variable.
+ *
+ * This is not in .bss, because we set it sufficiently early that the boot-time
+ * zeroing of .bss would clobber it.
+ */
+	.pushsection	.data..cacheline_aligned
+	.align	L1_CACHE_SHIFT
+ENTRY(__boot_cpu_mode)
+	.long	BOOT_CPU_MODE_EL2
+	.long	BOOT_CPU_MODE_EL1
+	.popsection
+
+#ifdef CONFIG_SMP
+	/*
+	 * This provides a "holding pen" for platforms to hold all secondary
+	 * cores are held until we're ready for them to initialise.
+	 */
+ENTRY(secondary_holding_pen)
+	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
+	bl	set_cpu_boot_mode_flag
+	mrs	x0, mpidr_el1
+	ldr     x1, =MPIDR_HWID_BITMASK
+	and	x0, x0, x1
+	adr_l	x3, secondary_holding_pen_release
+pen:	ldr	x4, [x3]
+	cmp	x4, x0
+	b.eq	secondary_startup
+	wfe
+	b	pen
+ENDPROC(secondary_holding_pen)
+
+	/*
+	 * Secondary entry point that jumps straight into the kernel. Only to
+	 * be used where CPUs are brought online dynamically by the kernel.
+	 */
+ENTRY(secondary_entry)
+	bl	el2_setup			// Drop to EL1
+	bl	set_cpu_boot_mode_flag
+	b	secondary_startup
+ENDPROC(secondary_entry)
+
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 */
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
+	bl	__cpu_setup			// initialise processor
+
+	ldr	x21, =secondary_data
+	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU
+	b	__enable_mmu
+ENDPROC(secondary_startup)
+
+ENTRY(__secondary_switched)
+	ldr	x0, [x21]			// get secondary_data.stack
+	mov	sp, x0
+	mov	x29, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+#endif	/* CONFIG_SMP */
+
+/*
+ * Enable the MMU.
+ *
+ *  x0  = SCTLR_EL1 value for turning on the MMU.
+ *  x27 = *virtual* address to jump to upon completion
+ *
+ * other registers depend on the function called upon completion
+ */
+__enable_mmu:
+	ldr	x5, =vectors
+	msr	vbar_el1, x5
+	msr	ttbr0_el1, x25			// load TTBR0
+	msr	ttbr1_el1, x26			// load TTBR1
+	isb
+	msr	sctlr_el1, x0
+	isb
+	br	x27
+ENDPROC(__enable_mmu)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
new file mode 100644
index 000000000..e7d934d3a
--- /dev/null
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -0,0 +1,954 @@
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "hw-breakpoint: " fmt
+
+#include <linux/compat.h>
+#include <linux/cpu_pm.h>
+#include <linux/errno.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/smp.h>
+
+#include <asm/current.h>
+#include <asm/debug-monitors.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/kdebug.h>
+#include <asm/traps.h>
+#include <asm/cputype.h>
+#include <asm/system_misc.h>
+
+/* Breakpoint currently in use for each BRP. */
+static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
+
+/* Watchpoint currently in use for each WRP. */
+static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
+
+/* Currently stepping a per-CPU kernel breakpoint. */
+static DEFINE_PER_CPU(int, stepping_kernel_bp);
+
+/* Number of BRP/WRP registers on this CPU. */
+static int core_num_brps;
+static int core_num_wrps;
+
+/* Determine number of BRP registers available. */
+static int get_num_brps(void)
+{
+	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+}
+
+/* Determine number of WRP registers available. */
+static int get_num_wrps(void)
+{
+	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+}
+
+int hw_breakpoint_slots(int type)
+{
+	/*
+	 * We can be called early, so don't rely on
+	 * our static variables being initialised.
+	 */
+	switch (type) {
+	case TYPE_INST:
+		return get_num_brps();
+	case TYPE_DATA:
+		return get_num_wrps();
+	default:
+		pr_warning("unknown slot type: %d\n", type);
+		return 0;
+	}
+}
+
+#define READ_WB_REG_CASE(OFF, N, REG, VAL)	\
+	case (OFF + N):				\
+		AARCH64_DBG_READ(N, REG, VAL);	\
+		break
+
+#define WRITE_WB_REG_CASE(OFF, N, REG, VAL)	\
+	case (OFF + N):				\
+		AARCH64_DBG_WRITE(N, REG, VAL);	\
+		break
+
+#define GEN_READ_WB_REG_CASES(OFF, REG, VAL)	\
+	READ_WB_REG_CASE(OFF,  0, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  1, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  2, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  3, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  4, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  5, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  6, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  7, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  8, REG, VAL);	\
+	READ_WB_REG_CASE(OFF,  9, REG, VAL);	\
+	READ_WB_REG_CASE(OFF, 10, REG, VAL);	\
+	READ_WB_REG_CASE(OFF, 11, REG, VAL);	\
+	READ_WB_REG_CASE(OFF, 12, REG, VAL);	\
+	READ_WB_REG_CASE(OFF, 13, REG, VAL);	\
+	READ_WB_REG_CASE(OFF, 14, REG, VAL);	\
+	READ_WB_REG_CASE(OFF, 15, REG, VAL)
+
+#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL)	\
+	WRITE_WB_REG_CASE(OFF,  0, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  1, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  2, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  3, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  4, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  5, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  6, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  7, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  8, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF,  9, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF, 10, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF, 11, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF, 12, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF, 13, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF, 14, REG, VAL);	\
+	WRITE_WB_REG_CASE(OFF, 15, REG, VAL)
+
+static u64 read_wb_reg(int reg, int n)
+{
+	u64 val = 0;
+
+	switch (reg + n) {
+	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
+	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);
+	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
+	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
+	default:
+		pr_warning("attempt to read from unknown breakpoint register %d\n", n);
+	}
+
+	return val;
+}
+
+static void write_wb_reg(int reg, int n, u64 val)
+{
+	switch (reg + n) {
+	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
+	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);
+	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
+	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
+	default:
+		pr_warning("attempt to write to unknown breakpoint register %d\n", n);
+	}
+	isb();
+}
+
+/*
+ * Convert a breakpoint privilege level to the corresponding exception
+ * level.
+ */
+static enum debug_el debug_exception_level(int privilege)
+{
+	switch (privilege) {
+	case AARCH64_BREAKPOINT_EL0:
+		return DBG_ACTIVE_EL0;
+	case AARCH64_BREAKPOINT_EL1:
+		return DBG_ACTIVE_EL1;
+	default:
+		pr_warning("invalid breakpoint privilege level %d\n", privilege);
+		return -EINVAL;
+	}
+}
+
+enum hw_breakpoint_ops {
+	HW_BREAKPOINT_INSTALL,
+	HW_BREAKPOINT_UNINSTALL,
+	HW_BREAKPOINT_RESTORE
+};
+
+/**
+ * hw_breakpoint_slot_setup - Find and setup a perf slot according to
+ *			      operations
+ *
+ * @slots: pointer to array of slots
+ * @max_slots: max number of slots
+ * @bp: perf_event to setup
+ * @ops: operation to be carried out on the slot
+ *
+ * Return:
+ *	slot index on success
+ *	-ENOSPC if no slot is available/matches
+ *	-EINVAL on wrong operations parameter
+ */
+static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots,
+				    struct perf_event *bp,
+				    enum hw_breakpoint_ops ops)
+{
+	int i;
+	struct perf_event **slot;
+
+	for (i = 0; i < max_slots; ++i) {
+		slot = &slots[i];
+		switch (ops) {
+		case HW_BREAKPOINT_INSTALL:
+			if (!*slot) {
+				*slot = bp;
+				return i;
+			}
+			break;
+		case HW_BREAKPOINT_UNINSTALL:
+			if (*slot == bp) {
+				*slot = NULL;
+				return i;
+			}
+			break;
+		case HW_BREAKPOINT_RESTORE:
+			if (*slot == bp)
+				return i;
+			break;
+		default:
+			pr_warn_once("Unhandled hw breakpoint ops %d\n", ops);
+			return -EINVAL;
+		}
+	}
+	return -ENOSPC;
+}
+
+static int hw_breakpoint_control(struct perf_event *bp,
+				 enum hw_breakpoint_ops ops)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slots;
+	struct debug_info *debug_info = &current->thread.debug;
+	int i, max_slots, ctrl_reg, val_reg, reg_enable;
+	enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege);
+	u32 ctrl;
+
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
+		/* Breakpoint */
+		ctrl_reg = AARCH64_DBG_REG_BCR;
+		val_reg = AARCH64_DBG_REG_BVR;
+		slots = this_cpu_ptr(bp_on_reg);
+		max_slots = core_num_brps;
+		reg_enable = !debug_info->bps_disabled;
+	} else {
+		/* Watchpoint */
+		ctrl_reg = AARCH64_DBG_REG_WCR;
+		val_reg = AARCH64_DBG_REG_WVR;
+		slots = this_cpu_ptr(wp_on_reg);
+		max_slots = core_num_wrps;
+		reg_enable = !debug_info->wps_disabled;
+	}
+
+	i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops);
+
+	if (WARN_ONCE(i < 0, "Can't find any breakpoint slot"))
+		return i;
+
+	switch (ops) {
+	case HW_BREAKPOINT_INSTALL:
+		/*
+		 * Ensure debug monitors are enabled at the correct exception
+		 * level.
+		 */
+		enable_debug_monitors(dbg_el);
+		/* Fall through */
+	case HW_BREAKPOINT_RESTORE:
+		/* Setup the address register. */
+		write_wb_reg(val_reg, i, info->address);
+
+		/* Setup the control register. */
+		ctrl = encode_ctrl_reg(info->ctrl);
+		write_wb_reg(ctrl_reg, i,
+			     reg_enable ? ctrl | 0x1 : ctrl & ~0x1);
+		break;
+	case HW_BREAKPOINT_UNINSTALL:
+		/* Reset the control register. */
+		write_wb_reg(ctrl_reg, i, 0);
+
+		/*
+		 * Release the debug monitors for the correct exception
+		 * level.
+		 */
+		disable_debug_monitors(dbg_el);
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL);
+}
+
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+	unsigned int len_in_bytes = 0;
+
+	switch (hbp_len) {
+	case ARM_BREAKPOINT_LEN_1:
+		len_in_bytes = 1;
+		break;
+	case ARM_BREAKPOINT_LEN_2:
+		len_in_bytes = 2;
+		break;
+	case ARM_BREAKPOINT_LEN_4:
+		len_in_bytes = 4;
+		break;
+	case ARM_BREAKPOINT_LEN_8:
+		len_in_bytes = 8;
+		break;
+	}
+
+	return len_in_bytes;
+}
+
+/*
+ * Check whether bp virtual address is in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
+{
+	unsigned int len;
+	unsigned long va;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	va = info->address;
+	len = get_hbp_len(info->ctrl.len);
+
+	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl.
+ * Hopefully this will disappear when ptrace can bypass the conversion
+ * to generic breakpoint descriptions.
+ */
+int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+			   int *gen_len, int *gen_type)
+{
+	/* Type */
+	switch (ctrl.type) {
+	case ARM_BREAKPOINT_EXECUTE:
+		*gen_type = HW_BREAKPOINT_X;
+		break;
+	case ARM_BREAKPOINT_LOAD:
+		*gen_type = HW_BREAKPOINT_R;
+		break;
+	case ARM_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_W;
+		break;
+	case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_RW;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (ctrl.len) {
+	case ARM_BREAKPOINT_LEN_1:
+		*gen_len = HW_BREAKPOINT_LEN_1;
+		break;
+	case ARM_BREAKPOINT_LEN_2:
+		*gen_len = HW_BREAKPOINT_LEN_2;
+		break;
+	case ARM_BREAKPOINT_LEN_4:
+		*gen_len = HW_BREAKPOINT_LEN_4;
+		break;
+	case ARM_BREAKPOINT_LEN_8:
+		*gen_len = HW_BREAKPOINT_LEN_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Construct an arch_hw_breakpoint from a perf_event.
+ */
+static int arch_build_bp_info(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	/* Type */
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_X:
+		info->ctrl.type = ARM_BREAKPOINT_EXECUTE;
+		break;
+	case HW_BREAKPOINT_R:
+		info->ctrl.type = ARM_BREAKPOINT_LOAD;
+		break;
+	case HW_BREAKPOINT_W:
+		info->ctrl.type = ARM_BREAKPOINT_STORE;
+		break;
+	case HW_BREAKPOINT_RW:
+		info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (bp->attr.bp_len) {
+	case HW_BREAKPOINT_LEN_1:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_1;
+		break;
+	case HW_BREAKPOINT_LEN_2:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_2;
+		break;
+	case HW_BREAKPOINT_LEN_4:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+		break;
+	case HW_BREAKPOINT_LEN_8:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * On AArch64, we only permit breakpoints of length 4, whereas
+	 * AArch32 also requires breakpoints of length 2 for Thumb.
+	 * Watchpoints can be of length 1, 2, 4 or 8 bytes.
+	 */
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
+		if (is_compat_task()) {
+			if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
+			    info->ctrl.len != ARM_BREAKPOINT_LEN_4)
+				return -EINVAL;
+		} else if (info->ctrl.len != ARM_BREAKPOINT_LEN_4) {
+			/*
+			 * FIXME: Some tools (I'm looking at you perf) assume
+			 *	  that breakpoints should be sizeof(long). This
+			 *	  is nonsense. For now, we fix up the parameter
+			 *	  but we should probably return -EINVAL instead.
+			 */
+			info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+		}
+	}
+
+	/* Address */
+	info->address = bp->attr.bp_addr;
+
+	/*
+	 * Privilege
+	 * Note that we disallow combined EL0/EL1 breakpoints because
+	 * that would complicate the stepping code.
+	 */
+	if (arch_check_bp_in_kernelspace(bp))
+		info->ctrl.privilege = AARCH64_BREAKPOINT_EL1;
+	else
+		info->ctrl.privilege = AARCH64_BREAKPOINT_EL0;
+
+	/* Enabled? */
+	info->ctrl.enabled = !bp->attr.disabled;
+
+	return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings.
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	int ret;
+	u64 alignment_mask, offset;
+
+	/* Build the arch_hw_breakpoint. */
+	ret = arch_build_bp_info(bp);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check address alignment.
+	 * We don't do any clever alignment correction for watchpoints
+	 * because using 64-bit unaligned addresses is deprecated for
+	 * AArch64.
+	 *
+	 * AArch32 tasks expect some simple alignment fixups, so emulate
+	 * that here.
+	 */
+	if (is_compat_task()) {
+		if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+			alignment_mask = 0x7;
+		else
+			alignment_mask = 0x3;
+		offset = info->address & alignment_mask;
+		switch (offset) {
+		case 0:
+			/* Aligned */
+			break;
+		case 1:
+			/* Allow single byte watchpoint. */
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_1)
+				break;
+		case 2:
+			/* Allow halfword watchpoints and breakpoints. */
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_2)
+				break;
+		default:
+			return -EINVAL;
+		}
+
+		info->address &= ~alignment_mask;
+		info->ctrl.len <<= offset;
+	} else {
+		if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE)
+			alignment_mask = 0x3;
+		else
+			alignment_mask = 0x7;
+		if (info->address & alignment_mask)
+			return -EINVAL;
+	}
+
+	/*
+	 * Disallow per-task kernel breakpoints since these would
+	 * complicate the stepping code.
+	 */
+	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Enable/disable all of the breakpoints active at the specified
+ * exception level at the register level.
+ * This is used when single-stepping after a breakpoint exception.
+ */
+static void toggle_bp_registers(int reg, enum debug_el el, int enable)
+{
+	int i, max_slots, privilege;
+	u32 ctrl;
+	struct perf_event **slots;
+
+	switch (reg) {
+	case AARCH64_DBG_REG_BCR:
+		slots = this_cpu_ptr(bp_on_reg);
+		max_slots = core_num_brps;
+		break;
+	case AARCH64_DBG_REG_WCR:
+		slots = this_cpu_ptr(wp_on_reg);
+		max_slots = core_num_wrps;
+		break;
+	default:
+		return;
+	}
+
+	for (i = 0; i < max_slots; ++i) {
+		if (!slots[i])
+			continue;
+
+		privilege = counter_arch_bp(slots[i])->ctrl.privilege;
+		if (debug_exception_level(privilege) != el)
+			continue;
+
+		ctrl = read_wb_reg(reg, i);
+		if (enable)
+			ctrl |= 0x1;
+		else
+			ctrl &= ~0x1;
+		write_wb_reg(reg, i, ctrl);
+	}
+}
+
+/*
+ * Debug exception handlers.
+ */
+static int breakpoint_handler(unsigned long unused, unsigned int esr,
+			      struct pt_regs *regs)
+{
+	int i, step = 0, *kernel_step;
+	u32 ctrl_reg;
+	u64 addr, val;
+	struct perf_event *bp, **slots;
+	struct debug_info *debug_info;
+	struct arch_hw_breakpoint_ctrl ctrl;
+
+	slots = this_cpu_ptr(bp_on_reg);
+	addr = instruction_pointer(regs);
+	debug_info = &current->thread.debug;
+
+	for (i = 0; i < core_num_brps; ++i) {
+		rcu_read_lock();
+
+		bp = slots[i];
+
+		if (bp == NULL)
+			goto unlock;
+
+		/* Check if the breakpoint value matches. */
+		val = read_wb_reg(AARCH64_DBG_REG_BVR, i);
+		if (val != (addr & ~0x3))
+			goto unlock;
+
+		/* Possible match, check the byte address select to confirm. */
+		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i);
+		decode_ctrl_reg(ctrl_reg, &ctrl);
+		if (!((1 << (addr & 0x3)) & ctrl.len))
+			goto unlock;
+
+		counter_arch_bp(bp)->trigger = addr;
+		perf_bp_event(bp, regs);
+
+		/* Do we need to handle the stepping? */
+		if (!bp->overflow_handler)
+			step = 1;
+unlock:
+		rcu_read_unlock();
+	}
+
+	if (!step)
+		return 0;
+
+	if (user_mode(regs)) {
+		debug_info->bps_disabled = 1;
+		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0);
+
+		/* If we're already stepping a watchpoint, just return. */
+		if (debug_info->wps_disabled)
+			return 0;
+
+		if (test_thread_flag(TIF_SINGLESTEP))
+			debug_info->suspended_step = 1;
+		else
+			user_enable_single_step(current);
+	} else {
+		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0);
+		kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+
+		if (*kernel_step != ARM_KERNEL_STEP_NONE)
+			return 0;
+
+		if (kernel_active_single_step()) {
+			*kernel_step = ARM_KERNEL_STEP_SUSPEND;
+		} else {
+			*kernel_step = ARM_KERNEL_STEP_ACTIVE;
+			kernel_enable_single_step(regs);
+		}
+	}
+
+	return 0;
+}
+
+static int watchpoint_handler(unsigned long addr, unsigned int esr,
+			      struct pt_regs *regs)
+{
+	int i, step = 0, *kernel_step, access;
+	u32 ctrl_reg;
+	u64 val, alignment_mask;
+	struct perf_event *wp, **slots;
+	struct debug_info *debug_info;
+	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
+
+	slots = this_cpu_ptr(wp_on_reg);
+	debug_info = &current->thread.debug;
+
+	for (i = 0; i < core_num_wrps; ++i) {
+		rcu_read_lock();
+
+		wp = slots[i];
+
+		if (wp == NULL)
+			goto unlock;
+
+		info = counter_arch_bp(wp);
+		/* AArch32 watchpoints are either 4 or 8 bytes aligned. */
+		if (is_compat_task()) {
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+				alignment_mask = 0x7;
+			else
+				alignment_mask = 0x3;
+		} else {
+			alignment_mask = 0x7;
+		}
+
+		/* Check if the watchpoint value matches. */
+		val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
+		if (val != (addr & ~alignment_mask))
+			goto unlock;
+
+		/* Possible match, check the byte address select to confirm. */
+		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
+		decode_ctrl_reg(ctrl_reg, &ctrl);
+		if (!((1 << (addr & alignment_mask)) & ctrl.len))
+			goto unlock;
+
+		/*
+		 * Check that the access type matches.
+		 * 0 => load, otherwise => store
+		 */
+		access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+			 HW_BREAKPOINT_R;
+		if (!(access & hw_breakpoint_type(wp)))
+			goto unlock;
+
+		info->trigger = addr;
+		perf_bp_event(wp, regs);
+
+		/* Do we need to handle the stepping? */
+		if (!wp->overflow_handler)
+			step = 1;
+
+unlock:
+		rcu_read_unlock();
+	}
+
+	if (!step)
+		return 0;
+
+	/*
+	 * We always disable EL0 watchpoints because the kernel can
+	 * cause these to fire via an unprivileged access.
+	 */
+	toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0);
+
+	if (user_mode(regs)) {
+		debug_info->wps_disabled = 1;
+
+		/* If we're already stepping a breakpoint, just return. */
+		if (debug_info->bps_disabled)
+			return 0;
+
+		if (test_thread_flag(TIF_SINGLESTEP))
+			debug_info->suspended_step = 1;
+		else
+			user_enable_single_step(current);
+	} else {
+		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0);
+		kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+
+		if (*kernel_step != ARM_KERNEL_STEP_NONE)
+			return 0;
+
+		if (kernel_active_single_step()) {
+			*kernel_step = ARM_KERNEL_STEP_SUSPEND;
+		} else {
+			*kernel_step = ARM_KERNEL_STEP_ACTIVE;
+			kernel_enable_single_step(regs);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Handle single-step exception.
+ */
+int reinstall_suspended_bps(struct pt_regs *regs)
+{
+	struct debug_info *debug_info = &current->thread.debug;
+	int handled_exception = 0, *kernel_step;
+
+	kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+
+	/*
+	 * Called from single-step exception handler.
+	 * Return 0 if execution can resume, 1 if a SIGTRAP should be
+	 * reported.
+	 */
+	if (user_mode(regs)) {
+		if (debug_info->bps_disabled) {
+			debug_info->bps_disabled = 0;
+			toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1);
+			handled_exception = 1;
+		}
+
+		if (debug_info->wps_disabled) {
+			debug_info->wps_disabled = 0;
+			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
+			handled_exception = 1;
+		}
+
+		if (handled_exception) {
+			if (debug_info->suspended_step) {
+				debug_info->suspended_step = 0;
+				/* Allow exception handling to fall-through. */
+				handled_exception = 0;
+			} else {
+				user_disable_single_step(current);
+			}
+		}
+	} else if (*kernel_step != ARM_KERNEL_STEP_NONE) {
+		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 1);
+		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 1);
+
+		if (!debug_info->wps_disabled)
+			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
+
+		if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
+			kernel_disable_single_step();
+			handled_exception = 1;
+		} else {
+			handled_exception = 0;
+		}
+
+		*kernel_step = ARM_KERNEL_STEP_NONE;
+	}
+
+	return !handled_exception;
+}
+
+/*
+ * Context-switcher for restoring suspended breakpoints.
+ */
+void hw_breakpoint_thread_switch(struct task_struct *next)
+{
+	/*
+	 *           current        next
+	 * disabled: 0              0     => The usual case, NOTIFY_DONE
+	 *           0              1     => Disable the registers
+	 *           1              0     => Enable the registers
+	 *           1              1     => NOTIFY_DONE. per-task bps will
+	 *                                   get taken care of by perf.
+	 */
+
+	struct debug_info *current_debug_info, *next_debug_info;
+
+	current_debug_info = &current->thread.debug;
+	next_debug_info = &next->thread.debug;
+
+	/* Update breakpoints. */
+	if (current_debug_info->bps_disabled != next_debug_info->bps_disabled)
+		toggle_bp_registers(AARCH64_DBG_REG_BCR,
+				    DBG_ACTIVE_EL0,
+				    !next_debug_info->bps_disabled);
+
+	/* Update watchpoints. */
+	if (current_debug_info->wps_disabled != next_debug_info->wps_disabled)
+		toggle_bp_registers(AARCH64_DBG_REG_WCR,
+				    DBG_ACTIVE_EL0,
+				    !next_debug_info->wps_disabled);
+}
+
+/*
+ * CPU initialisation.
+ */
+static void hw_breakpoint_reset(void *unused)
+{
+	int i;
+	struct perf_event **slots;
+	/*
+	 * When a CPU goes through cold-boot, it does not have any installed
+	 * slot, so it is safe to share the same function for restoring and
+	 * resetting breakpoints; when a CPU is hotplugged in, it goes
+	 * through the slots, which are all empty, hence it just resets control
+	 * and value for debug registers.
+	 * When this function is triggered on warm-boot through a CPU PM
+	 * notifier some slots might be initialized; if so they are
+	 * reprogrammed according to the debug slots content.
+	 */
+	for (slots = this_cpu_ptr(bp_on_reg), i = 0; i < core_num_brps; ++i) {
+		if (slots[i]) {
+			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE);
+		} else {
+			write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL);
+			write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL);
+		}
+	}
+
+	for (slots = this_cpu_ptr(wp_on_reg), i = 0; i < core_num_wrps; ++i) {
+		if (slots[i]) {
+			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE);
+		} else {
+			write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL);
+			write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL);
+		}
+	}
+}
+
+static int hw_breakpoint_reset_notify(struct notifier_block *self,
+						unsigned long action,
+						void *hcpu)
+{
+	int cpu = (long)hcpu;
+	if (action == CPU_ONLINE)
+		smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hw_breakpoint_reset_nb = {
+	.notifier_call = hw_breakpoint_reset_notify,
+};
+
+#ifdef CONFIG_CPU_PM
+extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *));
+#else
+static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+{
+}
+#endif
+
+/*
+ * One-time initialisation.
+ */
+static int __init arch_hw_breakpoint_init(void)
+{
+	core_num_brps = get_num_brps();
+	core_num_wrps = get_num_wrps();
+
+	pr_info("found %d breakpoint and %d watchpoint registers.\n",
+		core_num_brps, core_num_wrps);
+
+	cpu_notifier_register_begin();
+
+	/*
+	 * Reset the breakpoint resources. We assume that a halting
+	 * debugger will leave the world in a nice state for us.
+	 */
+	smp_call_function(hw_breakpoint_reset, NULL, 1);
+	hw_breakpoint_reset(NULL);
+
+	/* Register debug fault handlers. */
+	hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
+			      TRAP_HWBKPT, "hw-breakpoint handler");
+	hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP,
+			      TRAP_HWBKPT, "hw-watchpoint handler");
+
+	/* Register hotplug notifier. */
+	__register_cpu_notifier(&hw_breakpoint_reset_nb);
+
+	cpu_notifier_register_done();
+
+	/* Register cpu_suspend hw breakpoint restore hook */
+	cpu_suspend_set_dbg_restorer(hw_breakpoint_reset);
+
+	return 0;
+}
+arch_initcall(arch_hw_breakpoint_init);
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+}
+
+/*
+ * Dummy function to register with die_notifier.
+ */
+int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+				    unsigned long val, void *data)
+{
+	return NOTIFY_DONE;
+}
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
new file mode 100644
index 000000000..a272f335c
--- /dev/null
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -0,0 +1,110 @@
+/*
+ * Hypervisor stub
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author:	Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/assembler.h>
+#include <asm/ptrace.h>
+#include <asm/virt.h>
+
+	.text
+	.align 11
+
+ENTRY(__hyp_stub_vectors)
+	ventry	el2_sync_invalid		// Synchronous EL2t
+	ventry	el2_irq_invalid			// IRQ EL2t
+	ventry	el2_fiq_invalid			// FIQ EL2t
+	ventry	el2_error_invalid		// Error EL2t
+
+	ventry	el2_sync_invalid		// Synchronous EL2h
+	ventry	el2_irq_invalid			// IRQ EL2h
+	ventry	el2_fiq_invalid			// FIQ EL2h
+	ventry	el2_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq_invalid			// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync_invalid		// Synchronous 32-bit EL1
+	ventry	el1_irq_invalid			// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__hyp_stub_vectors)
+
+	.align 11
+
+el1_sync:
+	mrs	x1, esr_el2
+	lsr	x1, x1, #26
+	cmp	x1, #0x16
+	b.ne	2f				// Not an HVC trap
+	cbz	x0, 1f
+	msr	vbar_el2, x0			// Set vbar_el2
+	b	2f
+1:	mrs	x0, vbar_el2			// Return vbar_el2
+2:	eret
+ENDPROC(el1_sync)
+
+.macro invalid_vector	label
+\label:
+	b \label
+ENDPROC(\label)
+.endm
+
+	invalid_vector	el2_sync_invalid
+	invalid_vector	el2_irq_invalid
+	invalid_vector	el2_fiq_invalid
+	invalid_vector	el2_error_invalid
+	invalid_vector	el1_sync_invalid
+	invalid_vector	el1_irq_invalid
+	invalid_vector	el1_fiq_invalid
+	invalid_vector	el1_error_invalid
+
+/*
+ * __hyp_set_vectors: Call this after boot to set the initial hypervisor
+ * vectors as part of hypervisor installation.  On an SMP system, this should
+ * be called on each CPU.
+ *
+ * x0 must be the physical address of the new vector table, and must be
+ * 2KB aligned.
+ *
+ * Before calling this, you must check that the stub hypervisor is installed
+ * everywhere, by waiting for any secondary CPUs to be brought up and then
+ * checking that is_hyp_mode_available() is true.
+ *
+ * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or
+ * something else went wrong... in such cases, trying to install a new
+ * hypervisor is unlikely to work as desired.
+ *
+ * When you call into your shiny new hypervisor, sp_el2 will contain junk,
+ * so you will need to set that to something sensible at the new hypervisor's
+ * initialisation entry point.
+ */
+
+ENTRY(__hyp_get_vectors)
+	mov	x0, xzr
+	// fall through
+ENTRY(__hyp_set_vectors)
+	hvc	#0
+	ret
+ENDPROC(__hyp_get_vectors)
+ENDPROC(__hyp_set_vectors)
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
new file mode 100644
index 000000000..8fae0756e
--- /dev/null
+++ b/arch/arm64/kernel/image.h
@@ -0,0 +1,62 @@
+/*
+ * Linker script macros to generate Image header fields.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_IMAGE_H
+#define __ASM_IMAGE_H
+
+#ifndef LINKER_SCRIPT
+#error This file should only be included in vmlinux.lds.S
+#endif
+
+/*
+ * There aren't any ELF relocations we can use to endian-swap values known only
+ * at link time (e.g. the subtraction of two symbol addresses), so we must get
+ * the linker to endian-swap certain values before emitting them.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define DATA_LE64(data)					\
+	((((data) & 0x00000000000000ff) << 56) |	\
+	 (((data) & 0x000000000000ff00) << 40) |	\
+	 (((data) & 0x0000000000ff0000) << 24) |	\
+	 (((data) & 0x00000000ff000000) << 8)  |	\
+	 (((data) & 0x000000ff00000000) >> 8)  |	\
+	 (((data) & 0x0000ff0000000000) >> 24) |	\
+	 (((data) & 0x00ff000000000000) >> 40) |	\
+	 (((data) & 0xff00000000000000) >> 56))
+#else
+#define DATA_LE64(data) ((data) & 0xffffffffffffffff)
+#endif
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __HEAD_FLAG_BE	1
+#else
+#define __HEAD_FLAG_BE	0
+#endif
+
+#define __HEAD_FLAGS	(__HEAD_FLAG_BE << 0)
+
+/*
+ * These will output as part of the Image header, which should be little-endian
+ * regardless of the endianness of the kernel. While constant values could be
+ * endian swapped in head.S, all are done here for consistency.
+ */
+#define HEAD_SYMBOLS						\
+	_kernel_size_le		= DATA_LE64(_end - _text);	\
+	_kernel_offset_le	= DATA_LE64(TEXT_OFFSET);	\
+	_kernel_flags_le	= DATA_LE64(__HEAD_FLAGS);
+
+#endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
new file mode 100644
index 000000000..924902083
--- /dev/null
+++ b/arch/arm64/kernel/insn.c
@@ -0,0 +1,1084 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
+#include <asm/fixmap.h>
+#include <asm/insn.h>
+
+#define AARCH64_INSN_SF_BIT	BIT(31)
+#define AARCH64_INSN_N_BIT	BIT(22)
+
+static int aarch64_insn_encoding_class[] = {
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_REG,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_FPSIMD,
+	AARCH64_INSN_CLS_DP_IMM,
+	AARCH64_INSN_CLS_DP_IMM,
+	AARCH64_INSN_CLS_BR_SYS,
+	AARCH64_INSN_CLS_BR_SYS,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_REG,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_FPSIMD,
+};
+
+enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
+{
+	return aarch64_insn_encoding_class[(insn >> 25) & 0xf];
+}
+
+/* NOP is an alias of HINT */
+bool __kprobes aarch64_insn_is_nop(u32 insn)
+{
+	if (!aarch64_insn_is_hint(insn))
+		return false;
+
+	switch (insn & 0xFE0) {
+	case AARCH64_INSN_HINT_YIELD:
+	case AARCH64_INSN_HINT_WFE:
+	case AARCH64_INSN_HINT_WFI:
+	case AARCH64_INSN_HINT_SEV:
+	case AARCH64_INSN_HINT_SEVL:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static DEFINE_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+	unsigned long uintaddr = (uintptr_t) addr;
+	bool module = !core_kernel_text(uintaddr);
+	struct page *page;
+
+	if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+		page = vmalloc_to_page(addr);
+	else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+		page = virt_to_page(addr);
+	else
+		return addr;
+
+	BUG_ON(!page);
+	set_fixmap(fixmap, page_to_phys(page));
+
+	return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+	clear_fixmap(fixmap);
+}
+/*
+ * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
+ * little-endian.
+ */
+int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
+{
+	int ret;
+	u32 val;
+
+	ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE);
+	if (!ret)
+		*insnp = le32_to_cpu(val);
+
+	return ret;
+}
+
+static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
+{
+	void *waddr = addr;
+	unsigned long flags = 0;
+	int ret;
+
+	spin_lock_irqsave(&patch_lock, flags);
+	waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+	ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
+
+	patch_unmap(FIX_TEXT_POKE0);
+	spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
+int __kprobes aarch64_insn_write(void *addr, u32 insn)
+{
+	insn = cpu_to_le32(insn);
+	return __aarch64_insn_write(addr, insn);
+}
+
+static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
+{
+	if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS)
+		return false;
+
+	return	aarch64_insn_is_b(insn) ||
+		aarch64_insn_is_bl(insn) ||
+		aarch64_insn_is_svc(insn) ||
+		aarch64_insn_is_hvc(insn) ||
+		aarch64_insn_is_smc(insn) ||
+		aarch64_insn_is_brk(insn) ||
+		aarch64_insn_is_nop(insn);
+}
+
+/*
+ * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
+ * Section B2.6.5 "Concurrent modification and execution of instructions":
+ * Concurrent modification and execution of instructions can lead to the
+ * resulting instruction performing any behavior that can be achieved by
+ * executing any sequence of instructions that can be executed from the
+ * same Exception level, except where the instruction before modification
+ * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC,
+ * or SMC instruction.
+ */
+bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
+{
+	return __aarch64_insn_hotpatch_safe(old_insn) &&
+	       __aarch64_insn_hotpatch_safe(new_insn);
+}
+
+int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+{
+	u32 *tp = addr;
+	int ret;
+
+	/* A64 instructions must be word aligned */
+	if ((uintptr_t)tp & 0x3)
+		return -EINVAL;
+
+	ret = aarch64_insn_write(tp, insn);
+	if (ret == 0)
+		flush_icache_range((uintptr_t)tp,
+				   (uintptr_t)tp + AARCH64_INSN_SIZE);
+
+	return ret;
+}
+
+struct aarch64_insn_patch {
+	void		**text_addrs;
+	u32		*new_insns;
+	int		insn_cnt;
+	atomic_t	cpu_count;
+};
+
+static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+{
+	int i, ret = 0;
+	struct aarch64_insn_patch *pp = arg;
+
+	/* The first CPU becomes master */
+	if (atomic_inc_return(&pp->cpu_count) == 1) {
+		for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+							     pp->new_insns[i]);
+		/*
+		 * aarch64_insn_patch_text_nosync() calls flush_icache_range(),
+		 * which ends with "dsb; isb" pair guaranteeing global
+		 * visibility.
+		 */
+		/* Notify other processors with an additional increment. */
+		atomic_inc(&pp->cpu_count);
+	} else {
+		while (atomic_read(&pp->cpu_count) <= num_online_cpus())
+			cpu_relax();
+		isb();
+	}
+
+	return ret;
+}
+
+int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
+{
+	struct aarch64_insn_patch patch = {
+		.text_addrs = addrs,
+		.new_insns = insns,
+		.insn_cnt = cnt,
+		.cpu_count = ATOMIC_INIT(0),
+	};
+
+	if (cnt <= 0)
+		return -EINVAL;
+
+	return stop_machine(aarch64_insn_patch_text_cb, &patch,
+			    cpu_online_mask);
+}
+
+int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
+{
+	int ret;
+	u32 insn;
+
+	/* Unsafe to patch multiple instructions without synchronizaiton */
+	if (cnt == 1) {
+		ret = aarch64_insn_read(addrs[0], &insn);
+		if (ret)
+			return ret;
+
+		if (aarch64_insn_hotpatch_safe(insn, insns[0])) {
+			/*
+			 * ARMv8 architecture doesn't guarantee all CPUs see
+			 * the new instruction after returning from function
+			 * aarch64_insn_patch_text_nosync(). So send IPIs to
+			 * all other CPUs to achieve instruction
+			 * synchronization.
+			 */
+			ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]);
+			kick_all_cpus_sync();
+			return ret;
+		}
+	}
+
+	return aarch64_insn_patch_text_sync(addrs, insns, cnt);
+}
+
+static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
+						u32 *maskp, int *shiftp)
+{
+	u32 mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_26:
+		mask = BIT(26) - 1;
+		shift = 0;
+		break;
+	case AARCH64_INSN_IMM_19:
+		mask = BIT(19) - 1;
+		shift = 5;
+		break;
+	case AARCH64_INSN_IMM_16:
+		mask = BIT(16) - 1;
+		shift = 5;
+		break;
+	case AARCH64_INSN_IMM_14:
+		mask = BIT(14) - 1;
+		shift = 5;
+		break;
+	case AARCH64_INSN_IMM_12:
+		mask = BIT(12) - 1;
+		shift = 10;
+		break;
+	case AARCH64_INSN_IMM_9:
+		mask = BIT(9) - 1;
+		shift = 12;
+		break;
+	case AARCH64_INSN_IMM_7:
+		mask = BIT(7) - 1;
+		shift = 15;
+		break;
+	case AARCH64_INSN_IMM_6:
+	case AARCH64_INSN_IMM_S:
+		mask = BIT(6) - 1;
+		shift = 10;
+		break;
+	case AARCH64_INSN_IMM_R:
+		mask = BIT(6) - 1;
+		shift = 16;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*maskp = mask;
+	*shiftp = shift;
+
+	return 0;
+}
+
+#define ADR_IMM_HILOSPLIT	2
+#define ADR_IMM_SIZE		SZ_2M
+#define ADR_IMM_LOMASK		((1 << ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_HIMASK		((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_LOSHIFT		29
+#define ADR_IMM_HISHIFT		5
+
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
+		immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
+		insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
+		mask = ADR_IMM_SIZE - 1;
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
+	}
+
+	return (insn >> shift) & mask;
+}
+
+u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+				  u32 insn, u64 imm)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
+		imm >>= ADR_IMM_HILOSPLIT;
+		immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
+		imm = immlo | immhi;
+		mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
+			(ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
+	}
+
+	/* Update the immediate field. */
+	insn &= ~(mask << shift);
+	insn |= (imm & mask) << shift;
+
+	return insn;
+}
+
+static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
+					u32 insn,
+					enum aarch64_insn_register reg)
+{
+	int shift;
+
+	if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) {
+		pr_err("%s: unknown register encoding %d\n", __func__, reg);
+		return 0;
+	}
+
+	switch (type) {
+	case AARCH64_INSN_REGTYPE_RT:
+	case AARCH64_INSN_REGTYPE_RD:
+		shift = 0;
+		break;
+	case AARCH64_INSN_REGTYPE_RN:
+		shift = 5;
+		break;
+	case AARCH64_INSN_REGTYPE_RT2:
+	case AARCH64_INSN_REGTYPE_RA:
+		shift = 10;
+		break;
+	case AARCH64_INSN_REGTYPE_RM:
+		shift = 16;
+		break;
+	default:
+		pr_err("%s: unknown register type encoding %d\n", __func__,
+		       type);
+		return 0;
+	}
+
+	insn &= ~(GENMASK(4, 0) << shift);
+	insn |= reg << shift;
+
+	return insn;
+}
+
+static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
+					 u32 insn)
+{
+	u32 size;
+
+	switch (type) {
+	case AARCH64_INSN_SIZE_8:
+		size = 0;
+		break;
+	case AARCH64_INSN_SIZE_16:
+		size = 1;
+		break;
+	case AARCH64_INSN_SIZE_32:
+		size = 2;
+		break;
+	case AARCH64_INSN_SIZE_64:
+		size = 3;
+		break;
+	default:
+		pr_err("%s: unknown size encoding %d\n", __func__, type);
+		return 0;
+	}
+
+	insn &= ~GENMASK(31, 30);
+	insn |= size << 30;
+
+	return insn;
+}
+
+static inline long branch_imm_common(unsigned long pc, unsigned long addr,
+				     long range)
+{
+	long offset;
+
+	/*
+	 * PC: A 64-bit Program Counter holding the address of the current
+	 * instruction. A64 instructions must be word-aligned.
+	 */
+	BUG_ON((pc & 0x3) || (addr & 0x3));
+
+	offset = ((long)addr - (long)pc);
+	BUG_ON(offset < -range || offset >= range);
+
+	return offset;
+}
+
+u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+					  enum aarch64_insn_branch_type type)
+{
+	u32 insn;
+	long offset;
+
+	/*
+	 * B/BL support [-128M, 128M) offset
+	 * ARM64 virtual address arrangement guarantees all kernel and module
+	 * texts are within +/-128M.
+	 */
+	offset = branch_imm_common(pc, addr, SZ_128M);
+
+	switch (type) {
+	case AARCH64_INSN_BRANCH_LINK:
+		insn = aarch64_insn_get_bl_value();
+		break;
+	case AARCH64_INSN_BRANCH_NOLINK:
+		insn = aarch64_insn_get_b_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
+					     offset >> 2);
+}
+
+u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
+				     enum aarch64_insn_register reg,
+				     enum aarch64_insn_variant variant,
+				     enum aarch64_insn_branch_type type)
+{
+	u32 insn;
+	long offset;
+
+	offset = branch_imm_common(pc, addr, SZ_1M);
+
+	switch (type) {
+	case AARCH64_INSN_BRANCH_COMP_ZERO:
+		insn = aarch64_insn_get_cbz_value();
+		break;
+	case AARCH64_INSN_BRANCH_COMP_NONZERO:
+		insn = aarch64_insn_get_cbnz_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+					     offset >> 2);
+}
+
+u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
+				     enum aarch64_insn_condition cond)
+{
+	u32 insn;
+	long offset;
+
+	offset = branch_imm_common(pc, addr, SZ_1M);
+
+	insn = aarch64_insn_get_bcond_value();
+
+	BUG_ON(cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL);
+	insn |= cond;
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+					     offset >> 2);
+}
+
+u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op)
+{
+	return aarch64_insn_get_hint_value() | op;
+}
+
+u32 __kprobes aarch64_insn_gen_nop(void)
+{
+	return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
+}
+
+u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
+				enum aarch64_insn_branch_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_BRANCH_NOLINK:
+		insn = aarch64_insn_get_br_value();
+		break;
+	case AARCH64_INSN_BRANCH_LINK:
+		insn = aarch64_insn_get_blr_value();
+		break;
+	case AARCH64_INSN_BRANCH_RETURN:
+		insn = aarch64_insn_get_ret_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, reg);
+}
+
+u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
+				    enum aarch64_insn_register base,
+				    enum aarch64_insn_register offset,
+				    enum aarch64_insn_size_type size,
+				    enum aarch64_insn_ldst_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_LDST_LOAD_REG_OFFSET:
+		insn = aarch64_insn_get_ldr_reg_value();
+		break;
+	case AARCH64_INSN_LDST_STORE_REG_OFFSET:
+		insn = aarch64_insn_get_str_reg_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_ldst_size(size, insn);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+					    base);
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
+					    offset);
+}
+
+u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
+				     enum aarch64_insn_register reg2,
+				     enum aarch64_insn_register base,
+				     int offset,
+				     enum aarch64_insn_variant variant,
+				     enum aarch64_insn_ldst_type type)
+{
+	u32 insn;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX:
+		insn = aarch64_insn_get_ldp_pre_value();
+		break;
+	case AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX:
+		insn = aarch64_insn_get_stp_pre_value();
+		break;
+	case AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX:
+		insn = aarch64_insn_get_ldp_post_value();
+		break;
+	case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX:
+		insn = aarch64_insn_get_stp_post_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		/* offset must be multiples of 4 in the range [-256, 252] */
+		BUG_ON(offset & 0x3);
+		BUG_ON(offset < -256 || offset > 252);
+		shift = 2;
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		/* offset must be multiples of 8 in the range [-512, 504] */
+		BUG_ON(offset & 0x7);
+		BUG_ON(offset < -512 || offset > 504);
+		shift = 3;
+		insn |= AARCH64_INSN_SF_BIT;
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+					    reg1);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
+					    reg2);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+					    base);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_7, insn,
+					     offset >> shift);
+}
+
+u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
+				 enum aarch64_insn_register src,
+				 int imm, enum aarch64_insn_variant variant,
+				 enum aarch64_insn_adsb_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_ADSB_ADD:
+		insn = aarch64_insn_get_add_imm_value();
+		break;
+	case AARCH64_INSN_ADSB_SUB:
+		insn = aarch64_insn_get_sub_imm_value();
+		break;
+	case AARCH64_INSN_ADSB_ADD_SETFLAGS:
+		insn = aarch64_insn_get_adds_imm_value();
+		break;
+	case AARCH64_INSN_ADSB_SUB_SETFLAGS:
+		insn = aarch64_insn_get_subs_imm_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	BUG_ON(imm & ~(SZ_4K - 1));
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+}
+
+u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
+			      enum aarch64_insn_register src,
+			      int immr, int imms,
+			      enum aarch64_insn_variant variant,
+			      enum aarch64_insn_bitfield_type type)
+{
+	u32 insn;
+	u32 mask;
+
+	switch (type) {
+	case AARCH64_INSN_BITFIELD_MOVE:
+		insn = aarch64_insn_get_bfm_value();
+		break;
+	case AARCH64_INSN_BITFIELD_MOVE_UNSIGNED:
+		insn = aarch64_insn_get_ubfm_value();
+		break;
+	case AARCH64_INSN_BITFIELD_MOVE_SIGNED:
+		insn = aarch64_insn_get_sbfm_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		mask = GENMASK(4, 0);
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT | AARCH64_INSN_N_BIT;
+		mask = GENMASK(5, 0);
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	BUG_ON(immr & ~mask);
+	BUG_ON(imms & ~mask);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
+			      int imm, int shift,
+			      enum aarch64_insn_variant variant,
+			      enum aarch64_insn_movewide_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_MOVEWIDE_ZERO:
+		insn = aarch64_insn_get_movz_value();
+		break;
+	case AARCH64_INSN_MOVEWIDE_KEEP:
+		insn = aarch64_insn_get_movk_value();
+		break;
+	case AARCH64_INSN_MOVEWIDE_INVERSE:
+		insn = aarch64_insn_get_movn_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	BUG_ON(imm & ~(SZ_64K - 1));
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		BUG_ON(shift != 0 && shift != 16);
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		BUG_ON(shift != 0 && shift != 16 && shift != 32 &&
+		       shift != 48);
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn |= (shift >> 4) << 21;
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
+}
+
+u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
+					 enum aarch64_insn_register src,
+					 enum aarch64_insn_register reg,
+					 int shift,
+					 enum aarch64_insn_variant variant,
+					 enum aarch64_insn_adsb_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_ADSB_ADD:
+		insn = aarch64_insn_get_add_value();
+		break;
+	case AARCH64_INSN_ADSB_SUB:
+		insn = aarch64_insn_get_sub_value();
+		break;
+	case AARCH64_INSN_ADSB_ADD_SETFLAGS:
+		insn = aarch64_insn_get_adds_value();
+		break;
+	case AARCH64_INSN_ADSB_SUB_SETFLAGS:
+		insn = aarch64_insn_get_subs_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		BUG_ON(shift & ~(SZ_32 - 1));
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		BUG_ON(shift & ~(SZ_64 - 1));
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
+}
+
+u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
+			   enum aarch64_insn_register src,
+			   enum aarch64_insn_variant variant,
+			   enum aarch64_insn_data1_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_DATA1_REVERSE_16:
+		insn = aarch64_insn_get_rev16_value();
+		break;
+	case AARCH64_INSN_DATA1_REVERSE_32:
+		insn = aarch64_insn_get_rev32_value();
+		break;
+	case AARCH64_INSN_DATA1_REVERSE_64:
+		BUG_ON(variant != AARCH64_INSN_VARIANT_64BIT);
+		insn = aarch64_insn_get_rev64_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+}
+
+u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
+			   enum aarch64_insn_register src,
+			   enum aarch64_insn_register reg,
+			   enum aarch64_insn_variant variant,
+			   enum aarch64_insn_data2_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_DATA2_UDIV:
+		insn = aarch64_insn_get_udiv_value();
+		break;
+	case AARCH64_INSN_DATA2_SDIV:
+		insn = aarch64_insn_get_sdiv_value();
+		break;
+	case AARCH64_INSN_DATA2_LSLV:
+		insn = aarch64_insn_get_lslv_value();
+		break;
+	case AARCH64_INSN_DATA2_LSRV:
+		insn = aarch64_insn_get_lsrv_value();
+		break;
+	case AARCH64_INSN_DATA2_ASRV:
+		insn = aarch64_insn_get_asrv_value();
+		break;
+	case AARCH64_INSN_DATA2_RORV:
+		insn = aarch64_insn_get_rorv_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+}
+
+u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
+			   enum aarch64_insn_register src,
+			   enum aarch64_insn_register reg1,
+			   enum aarch64_insn_register reg2,
+			   enum aarch64_insn_variant variant,
+			   enum aarch64_insn_data3_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_DATA3_MADD:
+		insn = aarch64_insn_get_madd_value();
+		break;
+	case AARCH64_INSN_DATA3_MSUB:
+		insn = aarch64_insn_get_msub_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RA, insn, src);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+					    reg1);
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
+					    reg2);
+}
+
+u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
+					 enum aarch64_insn_register src,
+					 enum aarch64_insn_register reg,
+					 int shift,
+					 enum aarch64_insn_variant variant,
+					 enum aarch64_insn_logic_type type)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_LOGIC_AND:
+		insn = aarch64_insn_get_and_value();
+		break;
+	case AARCH64_INSN_LOGIC_BIC:
+		insn = aarch64_insn_get_bic_value();
+		break;
+	case AARCH64_INSN_LOGIC_ORR:
+		insn = aarch64_insn_get_orr_value();
+		break;
+	case AARCH64_INSN_LOGIC_ORN:
+		insn = aarch64_insn_get_orn_value();
+		break;
+	case AARCH64_INSN_LOGIC_EOR:
+		insn = aarch64_insn_get_eor_value();
+		break;
+	case AARCH64_INSN_LOGIC_EON:
+		insn = aarch64_insn_get_eon_value();
+		break;
+	case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+		insn = aarch64_insn_get_ands_value();
+		break;
+	case AARCH64_INSN_LOGIC_BIC_SETFLAGS:
+		insn = aarch64_insn_get_bics_value();
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		BUG_ON(shift & ~(SZ_32 - 1));
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		BUG_ON(shift & ~(SZ_64 - 1));
+		break;
+	default:
+		BUG_ON(1);
+		return AARCH64_BREAK_FAULT;
+	}
+
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
+}
+
+bool aarch32_insn_is_wide(u32 insn)
+{
+	return insn >= 0xe800;
+}
+
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+u32 aarch32_insn_extract_reg_num(u32 insn, int offset)
+{
+	return (insn & (0xf << offset)) >> offset;
+}
+
+#define OPC2_MASK	0x7
+#define OPC2_OFFSET	5
+u32 aarch32_insn_mcr_extract_opc2(u32 insn)
+{
+	return (insn & (OPC2_MASK << OPC2_OFFSET)) >> OPC2_OFFSET;
+}
+
+#define CRM_MASK	0xf
+u32 aarch32_insn_mcr_extract_crm(u32 insn)
+{
+	return insn & CRM_MASK;
+}
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
new file mode 100644
index 000000000..354be2a87
--- /dev/null
+++ b/arch/arm64/kernel/io.c
@@ -0,0 +1,110 @@
+/*
+ * Based on arch/arm/kernel/io.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ */
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+	while (count && (!IS_ALIGNED((unsigned long)from, 8) ||
+			 !IS_ALIGNED((unsigned long)to, 8))) {
+		*(u8 *)to = __raw_readb(from);
+		from++;
+		to++;
+		count--;
+	}
+
+	while (count >= 8) {
+		*(u64 *)to = __raw_readq(from);
+		from += 8;
+		to += 8;
+		count -= 8;
+	}
+
+	while (count) {
+		*(u8 *)to = __raw_readb(from);
+		from++;
+		to++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ */
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+	while (count && (!IS_ALIGNED((unsigned long)to, 8) ||
+			 !IS_ALIGNED((unsigned long)from, 8))) {
+		__raw_writeb(*(volatile u8 *)from, to);
+		from++;
+		to++;
+		count--;
+	}
+
+	while (count >= 8) {
+		__raw_writeq(*(volatile u64 *)from, to);
+		from += 8;
+		to += 8;
+		count -= 8;
+	}
+
+	while (count) {
+		__raw_writeb(*(volatile u8 *)from, to);
+		from++;
+		to++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ */
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+	u64 qc = (u8)c;
+
+	qc |= qc << 8;
+	qc |= qc << 16;
+	qc |= qc << 32;
+
+	while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
+		__raw_writeb(c, dst);
+		dst++;
+		count--;
+	}
+
+	while (count >= 8) {
+		__raw_writeq(qc, dst);
+		dst += 8;
+		count -= 8;
+	}
+
+	while (count) {
+		__raw_writeb(c, dst);
+		dst++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(__memset_io);
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
new file mode 100644
index 000000000..240b75c0e
--- /dev/null
+++ b/arch/arm64/kernel/irq.c
@@ -0,0 +1,119 @@
+/*
+ * Based on arch/arm/kernel/irq.c
+ *
+ * Copyright (C) 1992 Linus Torvalds
+ * Modifications for ARM processor Copyright (C) 1995-2000 Russell King.
+ * Support for Dynamic Tick Timer Copyright (C) 2004-2005 Nokia Corporation.
+ * Dynamic Tick Timer written by Tony Lindgren <tony@atomide.com> and
+ * Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/irqchip.h>
+#include <linux/seq_file.h>
+#include <linux/ratelimit.h>
+
+unsigned long irq_err_count;
+
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#ifdef CONFIG_SMP
+	show_ipi_list(p, prec);
+#endif
+	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
+	return 0;
+}
+
+void (*handle_arch_irq)(struct pt_regs *) = NULL;
+
+void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
+{
+	if (handle_arch_irq)
+		return;
+
+	handle_arch_irq = handle_irq;
+}
+
+void __init init_IRQ(void)
+{
+	irqchip_init();
+	if (!handle_arch_irq)
+		panic("No interrupt controller found.");
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static bool migrate_one_irq(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = d->affinity;
+	struct irq_chip *c;
+	bool ret = false;
+
+	/*
+	 * If this is a per-CPU interrupt, or the affinity does not
+	 * include this CPU, then we have nothing to do.
+	 */
+	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+		return false;
+
+	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		affinity = cpu_online_mask;
+		ret = true;
+	}
+
+	c = irq_data_get_irq_chip(d);
+	if (!c->irq_set_affinity)
+		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
+	else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
+		cpumask_copy(d->affinity, affinity);
+
+	return ret;
+}
+
+/*
+ * The current CPU has been marked offline.  Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
+ */
+void migrate_irqs(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	for_each_irq_desc(i, desc) {
+		bool affinity_broken;
+
+		raw_spin_lock(&desc->lock);
+		affinity_broken = migrate_one_irq(desc);
+		raw_spin_unlock(&desc->lock);
+
+		if (affinity_broken)
+			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+					    i, smp_processor_id());
+	}
+
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
new file mode 100644
index 000000000..4f1fec7a4
--- /dev/null
+++ b/arch/arm64/kernel/jump_label.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Based on arch/arm/kernel/jump_label.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/insn.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	void *addr = (void *)entry->code;
+	u32 insn;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		insn = aarch64_insn_gen_branch_imm(entry->code,
+						   entry->target,
+						   AARCH64_INSN_BRANCH_NOLINK);
+	} else {
+		insn = aarch64_insn_gen_nop();
+	}
+
+	aarch64_insn_patch_text(&addr, &insn, 1);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	/*
+	 * We use the architected A64 NOP in arch_static_branch, so there's no
+	 * need to patch an identical A64 NOP over the top of it here. The core
+	 * will call arch_jump_label_transform from a module notifier if the
+	 * NOP needs to be replaced by a branch.
+	 */
+}
+
+#endif	/* HAVE_JUMP_LABEL */
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
new file mode 100644
index 000000000..a0d10c55f
--- /dev/null
+++ b/arch/arm64/kernel/kgdb.c
@@ -0,0 +1,336 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/kernel/kgdb.c
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irq.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <asm/traps.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+	{ "x0", 8, offsetof(struct pt_regs, regs[0])},
+	{ "x1", 8, offsetof(struct pt_regs, regs[1])},
+	{ "x2", 8, offsetof(struct pt_regs, regs[2])},
+	{ "x3", 8, offsetof(struct pt_regs, regs[3])},
+	{ "x4", 8, offsetof(struct pt_regs, regs[4])},
+	{ "x5", 8, offsetof(struct pt_regs, regs[5])},
+	{ "x6", 8, offsetof(struct pt_regs, regs[6])},
+	{ "x7", 8, offsetof(struct pt_regs, regs[7])},
+	{ "x8", 8, offsetof(struct pt_regs, regs[8])},
+	{ "x9", 8, offsetof(struct pt_regs, regs[9])},
+	{ "x10", 8, offsetof(struct pt_regs, regs[10])},
+	{ "x11", 8, offsetof(struct pt_regs, regs[11])},
+	{ "x12", 8, offsetof(struct pt_regs, regs[12])},
+	{ "x13", 8, offsetof(struct pt_regs, regs[13])},
+	{ "x14", 8, offsetof(struct pt_regs, regs[14])},
+	{ "x15", 8, offsetof(struct pt_regs, regs[15])},
+	{ "x16", 8, offsetof(struct pt_regs, regs[16])},
+	{ "x17", 8, offsetof(struct pt_regs, regs[17])},
+	{ "x18", 8, offsetof(struct pt_regs, regs[18])},
+	{ "x19", 8, offsetof(struct pt_regs, regs[19])},
+	{ "x20", 8, offsetof(struct pt_regs, regs[20])},
+	{ "x21", 8, offsetof(struct pt_regs, regs[21])},
+	{ "x22", 8, offsetof(struct pt_regs, regs[22])},
+	{ "x23", 8, offsetof(struct pt_regs, regs[23])},
+	{ "x24", 8, offsetof(struct pt_regs, regs[24])},
+	{ "x25", 8, offsetof(struct pt_regs, regs[25])},
+	{ "x26", 8, offsetof(struct pt_regs, regs[26])},
+	{ "x27", 8, offsetof(struct pt_regs, regs[27])},
+	{ "x28", 8, offsetof(struct pt_regs, regs[28])},
+	{ "x29", 8, offsetof(struct pt_regs, regs[29])},
+	{ "x30", 8, offsetof(struct pt_regs, regs[30])},
+	{ "sp", 8, offsetof(struct pt_regs, sp)},
+	{ "pc", 8, offsetof(struct pt_regs, pc)},
+	{ "pstate", 8, offsetof(struct pt_regs, pstate)},
+	{ "v0", 16, -1 },
+	{ "v1", 16, -1 },
+	{ "v2", 16, -1 },
+	{ "v3", 16, -1 },
+	{ "v4", 16, -1 },
+	{ "v5", 16, -1 },
+	{ "v6", 16, -1 },
+	{ "v7", 16, -1 },
+	{ "v8", 16, -1 },
+	{ "v9", 16, -1 },
+	{ "v10", 16, -1 },
+	{ "v11", 16, -1 },
+	{ "v12", 16, -1 },
+	{ "v13", 16, -1 },
+	{ "v14", 16, -1 },
+	{ "v15", 16, -1 },
+	{ "v16", 16, -1 },
+	{ "v17", 16, -1 },
+	{ "v18", 16, -1 },
+	{ "v19", 16, -1 },
+	{ "v20", 16, -1 },
+	{ "v21", 16, -1 },
+	{ "v22", 16, -1 },
+	{ "v23", 16, -1 },
+	{ "v24", 16, -1 },
+	{ "v25", 16, -1 },
+	{ "v26", 16, -1 },
+	{ "v27", 16, -1 },
+	{ "v28", 16, -1 },
+	{ "v29", 16, -1 },
+	{ "v30", 16, -1 },
+	{ "v31", 16, -1 },
+	{ "fpsr", 4, -1 },
+	{ "fpcr", 4, -1 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+		       dbg_reg_def[regno].size);
+	else
+		memset(mem, 0, dbg_reg_def[regno].size);
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+		       dbg_reg_def[regno].size);
+	return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+	struct pt_regs *thread_regs;
+
+	/* Initialize to zero */
+	memset((char *)gdb_regs, 0, NUMREGBYTES);
+	thread_regs = task_pt_regs(task);
+	memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->pc = pc;
+}
+
+static int compiled_break;
+
+static void kgdb_arch_update_addr(struct pt_regs *regs,
+				char *remcom_in_buffer)
+{
+	unsigned long addr;
+	char *ptr;
+
+	ptr = &remcom_in_buffer[1];
+	if (kgdb_hex2long(&ptr, &addr))
+		kgdb_arch_set_pc(regs, addr);
+	else if (compiled_break == 1)
+		kgdb_arch_set_pc(regs, regs->pc + 4);
+
+	compiled_break = 0;
+}
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+			       int err_code, char *remcom_in_buffer,
+			       char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	int err;
+
+	switch (remcom_in_buffer[0]) {
+	case 'D':
+	case 'k':
+		/*
+		 * Packet D (Detach), k (kill). No special handling
+		 * is required here. Handle same as c packet.
+		 */
+	case 'c':
+		/*
+		 * Packet c (Continue) to continue executing.
+		 * Set pc to required address.
+		 * Try to read optional parameter and set pc.
+		 * If this was a compiled breakpoint, we need to move
+		 * to the next instruction else we will just breakpoint
+		 * over and over again.
+		 */
+		kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+		kgdb_single_step =  0;
+
+		/*
+		 * Received continue command, disable single step
+		 */
+		if (kernel_active_single_step())
+			kernel_disable_single_step();
+
+		err = 0;
+		break;
+	case 's':
+		/*
+		 * Update step address value with address passed
+		 * with step packet.
+		 * On debug exception return PC is copied to ELR
+		 * So just update PC.
+		 * If no step address is passed, resume from the address
+		 * pointed by PC. Do not update PC
+		 */
+		kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+		atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+		kgdb_single_step =  1;
+
+		/*
+		 * Enable single step handling
+		 */
+		if (!kernel_active_single_step())
+			kernel_enable_single_step(linux_regs);
+		err = 0;
+		break;
+	default:
+		err = -1;
+	}
+	return err;
+}
+
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+	return 0;
+}
+
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	compiled_break = 1;
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+
+	return 0;
+}
+
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+	return 0;
+}
+
+static struct break_hook kgdb_brkpt_hook = {
+	.esr_mask	= 0xffffffff,
+	.esr_val	= DBG_ESR_VAL_BRK(KGDB_DYN_DBG_BRK_IMM),
+	.fn		= kgdb_brk_fn
+};
+
+static struct break_hook kgdb_compiled_brkpt_hook = {
+	.esr_mask	= 0xffffffff,
+	.esr_val	= DBG_ESR_VAL_BRK(KGDB_COMPILED_DBG_BRK_IMM),
+	.fn		= kgdb_compiled_brk_fn
+};
+
+static struct step_hook kgdb_step_hook = {
+	.fn		= kgdb_step_brk_fn
+};
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+
+	if (kgdb_handle_exception(1, args->signr, cmd, regs))
+		return NOTIFY_DONE;
+	return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call	= kgdb_notify,
+	/*
+	 * Want to be lowest priority
+	 */
+	.priority	= -INT_MAX,
+};
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	int ret = register_die_notifier(&kgdb_notifier);
+
+	if (ret != 0)
+		return ret;
+
+	register_break_hook(&kgdb_brkpt_hook);
+	register_break_hook(&kgdb_compiled_brkpt_hook);
+	register_step_hook(&kgdb_step_hook);
+	return 0;
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_break_hook(&kgdb_brkpt_hook);
+	unregister_break_hook(&kgdb_compiled_brkpt_hook);
+	unregister_step_hook(&kgdb_step_hook);
+	unregister_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * ARM instructions are always in LE.
+ * Break instruction is encoded in LE format
+ */
+struct kgdb_arch arch_kgdb_ops = {
+	.gdb_bpt_instr = {
+		KGDB_DYN_BRK_INS_BYTE0,
+		KGDB_DYN_BRK_INS_BYTE1,
+		KGDB_DYN_BRK_INS_BYTE2,
+		KGDB_DYN_BRK_INS_BYTE3,
+	}
+};
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
new file mode 100644
index 000000000..997e6b27f
--- /dev/null
+++ b/arch/arm64/kernel/kuser32.S
@@ -0,0 +1,118 @@
+/*
+ * Low-level user helpers placed in the vectors page for AArch32.
+ * Based on the kuser helpers in arch/arm/kernel/entry-armv.S.
+ *
+ * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * AArch32 user helpers.
+ *
+ * Each segment is 32-byte aligned and will be moved to the top of the high
+ * vector page.  New segments (if ever needed) must be added in front of
+ * existing ones.  This mechanism should be used only for things that are
+ * really small and justified, and not be abused freely.
+ *
+ * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
+ */
+
+#include <asm/unistd.h>
+
+	.align	5
+	.globl	__kuser_helper_start
+__kuser_helper_start:
+
+__kuser_cmpxchg64:			// 0xffff0f60
+	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}
+	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]
+	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1]
+	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]
+	.inst	0xe0303004		//	eors		r3, r0, r4
+	.inst	0x00313005		//	eoreqs		r3, r1, r5
+	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]
+	.inst	0x03330001		//	teqeq		r3, #1
+	.inst	0x0afffff9		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
+	.inst	0xe2730000		//	rsbs		r0, r3, #0
+	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}
+	.inst	0xe12fff1e		//	bx		lr
+
+	.align	5
+__kuser_memory_barrier:			// 0xffff0fa0
+	.inst	0xf57ff05b		//	dmb		ish
+	.inst	0xe12fff1e		//	bx		lr
+
+	.align	5
+__kuser_cmpxchg:			// 0xffff0fc0
+	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]
+	.inst	0xe0533000		//	subs		r3, r3, r0
+	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]
+	.inst	0x03330001		//	teqeq		r3, #1
+	.inst	0x0afffffa		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
+	.inst	0xe2730000		//	rsbs		r0, r3, #0
+	.inst	0xe12fff1e		//	bx		lr
+
+	.align	5
+__kuser_get_tls:			// 0xffff0fe0
+	.inst	0xee1d0f70		//	mrc		p15, 0, r0, c13, c0, 3
+	.inst	0xe12fff1e		//	bx		lr
+	.rep	5
+	.word	0
+	.endr
+
+__kuser_helper_version:			// 0xffff0ffc
+	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
+	.globl	__kuser_helper_end
+__kuser_helper_end:
+
+/*
+ * AArch32 sigreturn code
+ *
+ * For ARM syscalls, the syscall number has to be loaded into r7.
+ * We do not support an OABI userspace.
+ *
+ * For Thumb syscalls, we also pass the syscall number via r7. We therefore
+ * need two 16-bit instructions.
+ */
+	.globl __aarch32_sigret_code_start
+__aarch32_sigret_code_start:
+
+	/*
+	 * ARM Code
+	 */
+	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn
+
+	/*
+	 * ARM code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn
+
+        .globl __aarch32_sigret_code_end
+__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
new file mode 100644
index 000000000..67bf4107f
--- /dev/null
+++ b/arch/arm64/kernel/module.c
@@ -0,0 +1,415 @@
+/*
+ * AArch64 loadable module support.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/elf.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/moduleloader.h>
+#include <linux/vmalloc.h>
+#include <asm/alternative.h>
+#include <asm/insn.h>
+#include <asm/sections.h>
+
+#define	AARCH64_INSN_IMM_MOVNZ		AARCH64_INSN_IMM_MAX
+#define	AARCH64_INSN_IMM_MOVK		AARCH64_INSN_IMM_16
+
+void *module_alloc(unsigned long size)
+{
+	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				    GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				    NUMA_NO_NODE, __builtin_return_address(0));
+}
+
+enum aarch64_reloc_op {
+	RELOC_OP_NONE,
+	RELOC_OP_ABS,
+	RELOC_OP_PREL,
+	RELOC_OP_PAGE,
+};
+
+static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val)
+{
+	switch (reloc_op) {
+	case RELOC_OP_ABS:
+		return val;
+	case RELOC_OP_PREL:
+		return val - (u64)place;
+	case RELOC_OP_PAGE:
+		return (val & ~0xfff) - ((u64)place & ~0xfff);
+	case RELOC_OP_NONE:
+		return 0;
+	}
+
+	pr_err("do_reloc: unknown relocation operation %d\n", reloc_op);
+	return 0;
+}
+
+static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
+{
+	u64 imm_mask = (1 << len) - 1;
+	s64 sval = do_reloc(op, place, val);
+
+	switch (len) {
+	case 16:
+		*(s16 *)place = sval;
+		break;
+	case 32:
+		*(s32 *)place = sval;
+		break;
+	case 64:
+		*(s64 *)place = sval;
+		break;
+	default:
+		pr_err("Invalid length (%d) for data relocation\n", len);
+		return 0;
+	}
+
+	/*
+	 * Extract the upper value bits (including the sign bit) and
+	 * shift them to bit 0.
+	 */
+	sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1);
+
+	/*
+	 * Overflow has occurred if the value is not representable in
+	 * len bits (i.e the bottom len bits are not sign-extended and
+	 * the top bits are not all zero).
+	 */
+	if ((u64)(sval + 1) > 2)
+		return -ERANGE;
+
+	return 0;
+}
+
+static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
+			   int lsb, enum aarch64_insn_imm_type imm_type)
+{
+	u64 imm, limit = 0;
+	s64 sval;
+	u32 insn = le32_to_cpu(*(u32 *)place);
+
+	sval = do_reloc(op, place, val);
+	sval >>= lsb;
+	imm = sval & 0xffff;
+
+	if (imm_type == AARCH64_INSN_IMM_MOVNZ) {
+		/*
+		 * For signed MOVW relocations, we have to manipulate the
+		 * instruction encoding depending on whether or not the
+		 * immediate is less than zero.
+		 */
+		insn &= ~(3 << 29);
+		if ((s64)imm >= 0) {
+			/* >=0: Set the instruction to MOVZ (opcode 10b). */
+			insn |= 2 << 29;
+		} else {
+			/*
+			 * <0: Set the instruction to MOVN (opcode 00b).
+			 *     Since we've masked the opcode already, we
+			 *     don't need to do anything other than
+			 *     inverting the new immediate field.
+			 */
+			imm = ~imm;
+		}
+		imm_type = AARCH64_INSN_IMM_MOVK;
+	}
+
+	/* Update the instruction with the new encoding. */
+	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
+	*(u32 *)place = cpu_to_le32(insn);
+
+	/* Shift out the immediate field. */
+	sval >>= 16;
+
+	/*
+	 * For unsigned immediates, the overflow check is straightforward.
+	 * For signed immediates, the sign bit is actually the bit past the
+	 * most significant bit of the field.
+	 * The AARCH64_INSN_IMM_16 immediate type is unsigned.
+	 */
+	if (imm_type != AARCH64_INSN_IMM_16) {
+		sval++;
+		limit++;
+	}
+
+	/* Check the upper bits depending on the sign of the immediate. */
+	if ((u64)sval > limit)
+		return -ERANGE;
+
+	return 0;
+}
+
+static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
+			  int lsb, int len, enum aarch64_insn_imm_type imm_type)
+{
+	u64 imm, imm_mask;
+	s64 sval;
+	u32 insn = le32_to_cpu(*(u32 *)place);
+
+	/* Calculate the relocation value. */
+	sval = do_reloc(op, place, val);
+	sval >>= lsb;
+
+	/* Extract the value bits and shift them to bit 0. */
+	imm_mask = (BIT(lsb + len) - 1) >> lsb;
+	imm = sval & imm_mask;
+
+	/* Update the instruction's immediate field. */
+	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
+	*(u32 *)place = cpu_to_le32(insn);
+
+	/*
+	 * Extract the upper value bits (including the sign bit) and
+	 * shift them to bit 0.
+	 */
+	sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1);
+
+	/*
+	 * Overflow has occurred if the upper bits are not all equal to
+	 * the sign bit of the value.
+	 */
+	if ((u64)(sval + 1) >= 2)
+		return -ERANGE;
+
+	return 0;
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	unsigned int i;
+	int ovf;
+	bool overflow_check;
+	Elf64_Sym *sym;
+	void *loc;
+	u64 val;
+	Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* loc corresponds to P in the AArch64 ELF document. */
+		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+
+		/* sym is the ELF symbol we're referring to. */
+		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+			+ ELF64_R_SYM(rel[i].r_info);
+
+		/* val corresponds to (S + A) in the AArch64 ELF document. */
+		val = sym->st_value + rel[i].r_addend;
+
+		/* Check for overflow by default. */
+		overflow_check = true;
+
+		/* Perform the static relocation. */
+		switch (ELF64_R_TYPE(rel[i].r_info)) {
+		/* Null relocations. */
+		case R_ARM_NONE:
+		case R_AARCH64_NONE:
+			ovf = 0;
+			break;
+
+		/* Data relocations. */
+		case R_AARCH64_ABS64:
+			overflow_check = false;
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+			break;
+		case R_AARCH64_ABS32:
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+			break;
+		case R_AARCH64_ABS16:
+			ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+			break;
+		case R_AARCH64_PREL64:
+			overflow_check = false;
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+			break;
+		case R_AARCH64_PREL32:
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+			break;
+		case R_AARCH64_PREL16:
+			ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+			break;
+
+		/* MOVW instruction relocations. */
+		case R_AARCH64_MOVW_UABS_G0_NC:
+			overflow_check = false;
+		case R_AARCH64_MOVW_UABS_G0:
+			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
+					      AARCH64_INSN_IMM_16);
+			break;
+		case R_AARCH64_MOVW_UABS_G1_NC:
+			overflow_check = false;
+		case R_AARCH64_MOVW_UABS_G1:
+			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
+					      AARCH64_INSN_IMM_16);
+			break;
+		case R_AARCH64_MOVW_UABS_G2_NC:
+			overflow_check = false;
+		case R_AARCH64_MOVW_UABS_G2:
+			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
+					      AARCH64_INSN_IMM_16);
+			break;
+		case R_AARCH64_MOVW_UABS_G3:
+			/* We're using the top bits so we can't overflow. */
+			overflow_check = false;
+			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
+					      AARCH64_INSN_IMM_16);
+			break;
+		case R_AARCH64_MOVW_SABS_G0:
+			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
+					      AARCH64_INSN_IMM_MOVNZ);
+			break;
+		case R_AARCH64_MOVW_SABS_G1:
+			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
+					      AARCH64_INSN_IMM_MOVNZ);
+			break;
+		case R_AARCH64_MOVW_SABS_G2:
+			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
+					      AARCH64_INSN_IMM_MOVNZ);
+			break;
+		case R_AARCH64_MOVW_PREL_G0_NC:
+			overflow_check = false;
+			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
+					      AARCH64_INSN_IMM_MOVK);
+			break;
+		case R_AARCH64_MOVW_PREL_G0:
+			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
+					      AARCH64_INSN_IMM_MOVNZ);
+			break;
+		case R_AARCH64_MOVW_PREL_G1_NC:
+			overflow_check = false;
+			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
+					      AARCH64_INSN_IMM_MOVK);
+			break;
+		case R_AARCH64_MOVW_PREL_G1:
+			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
+					      AARCH64_INSN_IMM_MOVNZ);
+			break;
+		case R_AARCH64_MOVW_PREL_G2_NC:
+			overflow_check = false;
+			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
+					      AARCH64_INSN_IMM_MOVK);
+			break;
+		case R_AARCH64_MOVW_PREL_G2:
+			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
+					      AARCH64_INSN_IMM_MOVNZ);
+			break;
+		case R_AARCH64_MOVW_PREL_G3:
+			/* We're using the top bits so we can't overflow. */
+			overflow_check = false;
+			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
+					      AARCH64_INSN_IMM_MOVNZ);
+			break;
+
+		/* Immediate instruction relocations. */
+		case R_AARCH64_LD_PREL_LO19:
+			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
+					     AARCH64_INSN_IMM_19);
+			break;
+		case R_AARCH64_ADR_PREL_LO21:
+			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
+					     AARCH64_INSN_IMM_ADR);
+			break;
+		case R_AARCH64_ADR_PREL_PG_HI21_NC:
+			overflow_check = false;
+		case R_AARCH64_ADR_PREL_PG_HI21:
+			ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
+					     AARCH64_INSN_IMM_ADR);
+			break;
+		case R_AARCH64_ADD_ABS_LO12_NC:
+		case R_AARCH64_LDST8_ABS_LO12_NC:
+			overflow_check = false;
+			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
+					     AARCH64_INSN_IMM_12);
+			break;
+		case R_AARCH64_LDST16_ABS_LO12_NC:
+			overflow_check = false;
+			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
+					     AARCH64_INSN_IMM_12);
+			break;
+		case R_AARCH64_LDST32_ABS_LO12_NC:
+			overflow_check = false;
+			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
+					     AARCH64_INSN_IMM_12);
+			break;
+		case R_AARCH64_LDST64_ABS_LO12_NC:
+			overflow_check = false;
+			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
+					     AARCH64_INSN_IMM_12);
+			break;
+		case R_AARCH64_LDST128_ABS_LO12_NC:
+			overflow_check = false;
+			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
+					     AARCH64_INSN_IMM_12);
+			break;
+		case R_AARCH64_TSTBR14:
+			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
+					     AARCH64_INSN_IMM_14);
+			break;
+		case R_AARCH64_CONDBR19:
+			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
+					     AARCH64_INSN_IMM_19);
+			break;
+		case R_AARCH64_JUMP26:
+		case R_AARCH64_CALL26:
+			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
+					     AARCH64_INSN_IMM_26);
+			break;
+
+		default:
+			pr_err("module %s: unsupported RELA relocation: %llu\n",
+			       me->name, ELF64_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+
+		if (overflow_check && ovf == -ERANGE)
+			goto overflow;
+
+	}
+
+	return 0;
+
+overflow:
+	pr_err("module %s: overflow in relocation type %d val %Lx\n",
+	       me->name, (int)ELF64_R_TYPE(rel[i].r_info), val);
+	return -ENOEXEC;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s, *se;
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+		if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) {
+			apply_alternatives((void *)s->sh_addr, s->sh_size);
+			return 0;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
new file mode 100644
index 000000000..4095379dc
--- /dev/null
+++ b/arch/arm64/kernel/pci.c
@@ -0,0 +1,73 @@
+/*
+ * Code borrowed from powerpc/kernel/pci-common.c
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+#include <asm/pci-bridge.h>
+
+/*
+ * Called after each bus is probed, but before its children are examined
+ */
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+	/* nothing to do, expected to be removed in the future */
+}
+
+/*
+ * We don't have to worry about legacy ISA devices, so nothing to do here
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				resource_size_t size, resource_size_t align)
+{
+	return res->start;
+}
+
+/*
+ * Try to assign the IRQ number from DT when adding a new device
+ */
+int pcibios_add_device(struct pci_dev *dev)
+{
+	dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
+
+	return 0;
+}
+
+/*
+ * raw_pci_read/write - Platform-specific PCI config space access.
+ */
+int raw_pci_read(unsigned int domain, unsigned int bus,
+		  unsigned int devfn, int reg, int len, u32 *val)
+{
+	return -ENXIO;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus,
+		unsigned int devfn, int reg, int len, u32 val)
+{
+	return -ENXIO;
+}
+
+#ifdef CONFIG_ACPI
+/* Root bridge scanning */
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
+{
+	/* TODO: Should be revisited when implementing PCI on ACPI */
+	return NULL;
+}
+#endif
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
new file mode 100644
index 000000000..cce18c85d
--- /dev/null
+++ b/arch/arm64/kernel/perf_event.c
@@ -0,0 +1,1592 @@
+/*
+ * PMU support
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based heavily on the ARMv7 perf event code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+#include <asm/cputype.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+#include <asm/stacktrace.h>
+
+/*
+ * ARMv8 supports a maximum of 32 events.
+ * The cycle counter is included in this total.
+ */
+#define ARMPMU_MAX_HWEVENTS		32
+
+static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
+static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+
+/* Set at runtime when we know what CPU type we are. */
+static struct arm_pmu *cpu_pmu;
+
+int
+armpmu_get_max_events(void)
+{
+	int max_events = 0;
+
+	if (cpu_pmu != NULL)
+		max_events = cpu_pmu->num_events;
+
+	return max_events;
+}
+EXPORT_SYMBOL_GPL(armpmu_get_max_events);
+
+int perf_num_counters(void)
+{
+	return armpmu_get_max_events();
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+#define HW_OP_UNSUPPORTED		0xFFFF
+
+#define C(_x) \
+	PERF_COUNT_HW_CACHE_##_x
+
+#define CACHE_OP_UNSUPPORTED		0xFFFF
+
+static int
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result, ret;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	return ret;
+}
+
+static int
+armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
+{
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+	return (int)(config & raw_event_mask);
+}
+
+static int map_cpu_event(struct perf_event *event,
+			 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+			 const unsigned (*cache_map)
+					[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX],
+			 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int
+armpmu_event_set_period(struct perf_event *event,
+			struct hw_perf_event *hwc,
+			int idx)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;
+
+	local64_set(&hwc->prev_count, (u64)-left);
+
+	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+u64
+armpmu_event_update(struct perf_event *event,
+		    struct hw_perf_event *hwc,
+		    int idx)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	u64 delta, prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = armpmu->read_counter(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void
+armpmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* Don't read disabled counters! */
+	if (hwc->idx < 0)
+		return;
+
+	armpmu_event_update(event, hwc, hwc->idx);
+}
+
+static void
+armpmu_stop(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in armpmu_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		armpmu->disable(hwc, hwc->idx);
+		barrier(); /* why? */
+		armpmu_event_update(event, hwc, hwc->idx);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static void
+armpmu_start(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	/*
+	 * Set the period again. Some counters can't be stopped, so when we
+	 * were stopped we simply disabled the IRQ source and the counter
+	 * may have been left counting. If we don't do this step then we may
+	 * get an interrupt too soon or *way* too late if the overflow has
+	 * happened since disabling.
+	 */
+	armpmu_event_set_period(event, hwc, hwc->idx);
+	armpmu->enable(hwc, hwc->idx);
+}
+
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	WARN_ON(idx < 0);
+
+	armpmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static int
+armpmu_add(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
+
+	perf_pmu_disable(event->pmu);
+
+	/* If we don't have a space for the counter then finish early. */
+	idx = armpmu->get_event_idx(hw_events, hwc);
+	if (idx < 0) {
+		err = idx;
+		goto out;
+	}
+
+	/*
+	 * If there is an event in the counter we are going to use then make
+	 * sure it is disabled.
+	 */
+	event->hw.idx = idx;
+	armpmu->disable(hwc, idx);
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		armpmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+				struct perf_event *event)
+{
+	struct arm_pmu *armpmu;
+	struct hw_perf_event fake_event = event->hw;
+	struct pmu *leader_pmu = event->group_leader->pmu;
+
+	if (is_software_event(event))
+		return 1;
+
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
+	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	armpmu = to_arm_pmu(event->pmu);
+	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
+}
+
+static int
+validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct pmu_hw_events fake_pmu;
+	DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
+
+	/*
+	 * Initialise the fake PMU. We only need to populate the
+	 * used_mask for the purposes of validation.
+	 */
+	memset(fake_used_mask, 0, sizeof(fake_used_mask));
+	fake_pmu.used_mask = fake_used_mask;
+
+	if (!validate_event(event->pmu, &fake_pmu, leader))
+		return -EINVAL;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
+			return -EINVAL;
+	}
+
+	if (!validate_event(event->pmu, &fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+armpmu_disable_percpu_irq(void *data)
+{
+	unsigned int irq = *(unsigned int *)data;
+	disable_percpu_irq(irq);
+}
+
+static void
+armpmu_release_hardware(struct arm_pmu *armpmu)
+{
+	int irq;
+	unsigned int i, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (!irqs)
+		return;
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq <= 0)
+		return;
+
+	if (irq_is_percpu(irq)) {
+		on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
+		free_percpu_irq(irq, &cpu_hw_events);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
+				continue;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq > 0)
+				free_irq(irq, armpmu);
+		}
+	}
+}
+
+static void
+armpmu_enable_percpu_irq(void *data)
+{
+	unsigned int irq = *(unsigned int *)data;
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static int
+armpmu_reserve_hardware(struct arm_pmu *armpmu)
+{
+	int err, irq;
+	unsigned int i, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
+
+	if (!pmu_device) {
+		pr_err("no PMU device registered\n");
+		return -ENODEV;
+	}
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (!irqs) {
+		pr_err("no irqs for PMUs defined\n");
+		return -ENODEV;
+	}
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq <= 0) {
+		pr_err("failed to get valid irq for PMU device\n");
+		return -ENODEV;
+	}
+
+	if (irq_is_percpu(irq)) {
+		err = request_percpu_irq(irq, armpmu->handle_irq,
+				"arm-pmu", &cpu_hw_events);
+
+		if (err) {
+			pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
+					irq);
+			armpmu_release_hardware(armpmu);
+			return err;
+		}
+
+		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			err = 0;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq <= 0)
+				continue;
+
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
+			/*
+			 * If we have a single PMU interrupt that we can't shift,
+			 * assume that we're running on a uniprocessor machine and
+			 * continue. Otherwise, continue without this interrupt.
+			 */
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
+				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+						irq, cpu);
+				continue;
+			}
+
+			err = request_irq(irq, armpmu->handle_irq,
+					IRQF_NOBALANCING,
+					"arm-pmu", armpmu);
+			if (err) {
+				pr_err("unable to request IRQ%d for ARM PMU counters\n",
+						irq);
+				armpmu_release_hardware(armpmu);
+				return err;
+			}
+
+			cpumask_set_cpu(cpu, &armpmu->active_irqs);
+		}
+	}
+
+	return 0;
+}
+
+static void
+hw_perf_event_destroy(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	atomic_t *active_events	 = &armpmu->active_events;
+	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
+		armpmu_release_hardware(armpmu);
+		mutex_unlock(pmu_reserve_mutex);
+	}
+}
+
+static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
+}
+
+static int
+__hw_perf_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping, err;
+
+	mapping = armpmu->map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
+		pr_debug("ARM performance counters do not support mode exclusion\n");
+		return -EPERM;
+	}
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base	    |= (unsigned long)mapping;
+
+	if (!hwc->sample_period) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		hwc->sample_period  = armpmu->max_period >> 1;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	err = 0;
+	if (event->group_leader != event) {
+		err = validate_group(event);
+		if (err)
+			return -EINVAL;
+	}
+
+	return err;
+}
+
+static int armpmu_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	int err = 0;
+	atomic_t *active_events = &armpmu->active_events;
+
+	if (armpmu->map_event(event) == -ENOENT)
+		return -ENOENT;
+
+	event->destroy = hw_perf_event_destroy;
+
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&armpmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = armpmu_reserve_hardware(armpmu);
+
+		if (!err)
+			atomic_inc(active_events);
+		mutex_unlock(&armpmu->reserve_mutex);
+	}
+
+	if (err)
+		return err;
+
+	err = __hw_perf_event_init(event);
+	if (err)
+		hw_perf_event_destroy(event);
+
+	return err;
+}
+
+static void armpmu_enable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	if (enabled)
+		armpmu->start();
+}
+
+static void armpmu_disable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	armpmu->stop();
+}
+
+static void __init armpmu_init(struct arm_pmu *armpmu)
+{
+	atomic_set(&armpmu->active_events, 0);
+	mutex_init(&armpmu->reserve_mutex);
+
+	armpmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+	};
+}
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
+{
+	armpmu_init(armpmu);
+	return perf_pmu_register(&armpmu->pmu, name, type);
+}
+
+/*
+ * ARMv8 PMUv3 Performance Events handling code.
+ * Common event types.
+ */
+enum armv8_pmuv3_perf_types {
+	/* Required events. */
+	ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR			= 0x00,
+	ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL			= 0x03,
+	ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS			= 0x04,
+	ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED			= 0x10,
+	ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES			= 0x11,
+	ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED			= 0x12,
+
+	/* At least one of the following is required. */
+	ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED			= 0x08,
+	ARMV8_PMUV3_PERFCTR_OP_SPEC				= 0x1B,
+
+	/* Common architectural events. */
+	ARMV8_PMUV3_PERFCTR_MEM_READ				= 0x06,
+	ARMV8_PMUV3_PERFCTR_MEM_WRITE				= 0x07,
+	ARMV8_PMUV3_PERFCTR_EXC_TAKEN				= 0x09,
+	ARMV8_PMUV3_PERFCTR_EXC_EXECUTED			= 0x0A,
+	ARMV8_PMUV3_PERFCTR_CID_WRITE				= 0x0B,
+	ARMV8_PMUV3_PERFCTR_PC_WRITE				= 0x0C,
+	ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH			= 0x0D,
+	ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN			= 0x0E,
+	ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS		= 0x0F,
+	ARMV8_PMUV3_PERFCTR_TTBR_WRITE				= 0x1C,
+
+	/* Common microarchitectural events. */
+	ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL			= 0x01,
+	ARMV8_PMUV3_PERFCTR_ITLB_REFILL				= 0x02,
+	ARMV8_PMUV3_PERFCTR_DTLB_REFILL				= 0x05,
+	ARMV8_PMUV3_PERFCTR_MEM_ACCESS				= 0x13,
+	ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS			= 0x14,
+	ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB			= 0x15,
+	ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS			= 0x16,
+	ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL			= 0x17,
+	ARMV8_PMUV3_PERFCTR_L2_CACHE_WB				= 0x18,
+	ARMV8_PMUV3_PERFCTR_BUS_ACCESS				= 0x19,
+	ARMV8_PMUV3_PERFCTR_MEM_ERROR				= 0x1A,
+	ARMV8_PMUV3_PERFCTR_BUS_CYCLES				= 0x1D,
+};
+
+/* PMUv3 HW events mapping. */
+static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+						[PERF_COUNT_HW_CACHE_OP_MAX]
+						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
+ * Perf Events' indices
+ */
+#define	ARMV8_IDX_CYCLE_COUNTER	0
+#define	ARMV8_IDX_COUNTER0	1
+#define	ARMV8_IDX_COUNTER_LAST	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+#define	ARMV8_MAX_COUNTERS	32
+#define	ARMV8_COUNTER_MASK	(ARMV8_MAX_COUNTERS - 1)
+
+/*
+ * ARMv8 low level PMU access
+ */
+
+/*
+ * Perf Event to low level counters mapping
+ */
+#define	ARMV8_IDX_TO_COUNTER(x)	\
+	(((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK)
+
+/*
+ * Per-CPU PMCR: config reg
+ */
+#define ARMV8_PMCR_E		(1 << 0) /* Enable all counters */
+#define ARMV8_PMCR_P		(1 << 1) /* Reset all counters */
+#define ARMV8_PMCR_C		(1 << 2) /* Cycle counter reset */
+#define ARMV8_PMCR_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV8_PMCR_X		(1 << 4) /* Export to ETM */
+#define ARMV8_PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
+#define	ARMV8_PMCR_N_SHIFT	11	 /* Number of counters supported */
+#define	ARMV8_PMCR_N_MASK	0x1f
+#define	ARMV8_PMCR_MASK		0x3f	 /* Mask for writable bits */
+
+/*
+ * PMOVSR: counters overflow flag status reg
+ */
+#define	ARMV8_OVSR_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV8_OVERFLOWED_MASK	ARMV8_OVSR_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define	ARMV8_EVTYPE_MASK	0xc80003ff	/* Mask for writable bits */
+#define	ARMV8_EVTYPE_EVENT	0x3ff		/* Mask for EVENT bits */
+
+/*
+ * Event filters for PMUv3
+ */
+#define	ARMV8_EXCLUDE_EL1	(1 << 31)
+#define	ARMV8_EXCLUDE_EL0	(1 << 30)
+#define	ARMV8_INCLUDE_EL2	(1 << 27)
+
+static inline u32 armv8pmu_pmcr_read(void)
+{
+	u32 val;
+	asm volatile("mrs %0, pmcr_el0" : "=r" (val));
+	return val;
+}
+
+static inline void armv8pmu_pmcr_write(u32 val)
+{
+	val &= ARMV8_PMCR_MASK;
+	isb();
+	asm volatile("msr pmcr_el0, %0" :: "r" (val));
+}
+
+static inline int armv8pmu_has_overflowed(u32 pmovsr)
+{
+	return pmovsr & ARMV8_OVERFLOWED_MASK;
+}
+
+static inline int armv8pmu_counter_valid(int idx)
+{
+	return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST;
+}
+
+static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
+{
+	int ret = 0;
+	u32 counter;
+
+	if (!armv8pmu_counter_valid(idx)) {
+		pr_err("CPU%u checking wrong counter %d overflow status\n",
+			smp_processor_id(), idx);
+	} else {
+		counter = ARMV8_IDX_TO_COUNTER(idx);
+		ret = pmnc & BIT(counter);
+	}
+
+	return ret;
+}
+
+static inline int armv8pmu_select_counter(int idx)
+{
+	u32 counter;
+
+	if (!armv8pmu_counter_valid(idx)) {
+		pr_err("CPU%u selecting wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
+	}
+
+	counter = ARMV8_IDX_TO_COUNTER(idx);
+	asm volatile("msr pmselr_el0, %0" :: "r" (counter));
+	isb();
+
+	return idx;
+}
+
+static inline u32 armv8pmu_read_counter(int idx)
+{
+	u32 value = 0;
+
+	if (!armv8pmu_counter_valid(idx))
+		pr_err("CPU%u reading wrong counter %d\n",
+			smp_processor_id(), idx);
+	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
+		asm volatile("mrs %0, pmccntr_el0" : "=r" (value));
+	else if (armv8pmu_select_counter(idx) == idx)
+		asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value));
+
+	return value;
+}
+
+static inline void armv8pmu_write_counter(int idx, u32 value)
+{
+	if (!armv8pmu_counter_valid(idx))
+		pr_err("CPU%u writing wrong counter %d\n",
+			smp_processor_id(), idx);
+	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
+		asm volatile("msr pmccntr_el0, %0" :: "r" (value));
+	else if (armv8pmu_select_counter(idx) == idx)
+		asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
+}
+
+static inline void armv8pmu_write_evtype(int idx, u32 val)
+{
+	if (armv8pmu_select_counter(idx) == idx) {
+		val &= ARMV8_EVTYPE_MASK;
+		asm volatile("msr pmxevtyper_el0, %0" :: "r" (val));
+	}
+}
+
+static inline int armv8pmu_enable_counter(int idx)
+{
+	u32 counter;
+
+	if (!armv8pmu_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
+	}
+
+	counter = ARMV8_IDX_TO_COUNTER(idx);
+	asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
+	return idx;
+}
+
+static inline int armv8pmu_disable_counter(int idx)
+{
+	u32 counter;
+
+	if (!armv8pmu_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
+	}
+
+	counter = ARMV8_IDX_TO_COUNTER(idx);
+	asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
+	return idx;
+}
+
+static inline int armv8pmu_enable_intens(int idx)
+{
+	u32 counter;
+
+	if (!armv8pmu_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
+	}
+
+	counter = ARMV8_IDX_TO_COUNTER(idx);
+	asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
+	return idx;
+}
+
+static inline int armv8pmu_disable_intens(int idx)
+{
+	u32 counter;
+
+	if (!armv8pmu_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
+	}
+
+	counter = ARMV8_IDX_TO_COUNTER(idx);
+	asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
+	isb();
+	/* Clear the overflow flag in case an interrupt is pending. */
+	asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
+	isb();
+	return idx;
+}
+
+static inline u32 armv8pmu_getreset_flags(void)
+{
+	u32 value;
+
+	/* Read */
+	asm volatile("mrs %0, pmovsclr_el0" : "=r" (value));
+
+	/* Write to clear flags */
+	value &= ARMV8_OVSR_MASK;
+	asm volatile("msr pmovsclr_el0, %0" :: "r" (value));
+
+	return value;
+}
+
+static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv8pmu_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters).
+	 */
+	armv8pmu_write_evtype(idx, hwc->config_base);
+
+	/*
+	 * Enable interrupt for this counter
+	 */
+	armv8pmu_enable_intens(idx);
+
+	/*
+	 * Enable counter
+	 */
+	armv8pmu_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	/*
+	 * Disable counter and interrupt
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv8pmu_disable_counter(idx);
+
+	/*
+	 * Disable interrupt for this counter
+	 */
+	armv8pmu_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
+{
+	u32 pmovsr;
+	struct perf_sample_data data;
+	struct pmu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * Get and reset the IRQ flags
+	 */
+	pmovsr = armv8pmu_getreset_flags();
+
+	/*
+	 * Did an overflow occur?
+	 */
+	if (!armv8pmu_has_overflowed(pmovsr))
+		return IRQ_NONE;
+
+	/*
+	 * Handle the counter(s) overflow(s)
+	 */
+	regs = get_irq_regs();
+
+	cpuc = this_cpu_ptr(&cpu_hw_events);
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		/* Ignore if we don't have an event. */
+		if (!event)
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cpu_pmu->disable(hwc, idx);
+	}
+
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static void armv8pmu_start(void)
+{
+	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	/* Enable all counters */
+	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void armv8pmu_stop(void)
+{
+	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	/* Disable all counters */
+	armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				  struct hw_perf_event *event)
+{
+	int idx;
+	unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
+
+	/* Always place a cycle counter into the cycle counter. */
+	if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
+		if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return ARMV8_IDX_CYCLE_COUNTER;
+	}
+
+	/*
+	 * For anything other than a cycle counter, try and use
+	 * the events counters
+	 */
+	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+
+	/* The counters are all in use. */
+	return -EAGAIN;
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv8pmu_set_event_filter(struct hw_perf_event *event,
+				     struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (attr->exclude_idle)
+		return -EPERM;
+	if (attr->exclude_user)
+		config_base |= ARMV8_EXCLUDE_EL0;
+	if (attr->exclude_kernel)
+		config_base |= ARMV8_EXCLUDE_EL1;
+	if (!attr->exclude_hv)
+		config_base |= ARMV8_INCLUDE_EL2;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base = config_base;
+
+	return 0;
+}
+
+static void armv8pmu_reset(void *info)
+{
+	u32 idx, nb_cnt = cpu_pmu->num_events;
+
+	/* The counter and interrupt enable registers are unknown at reset. */
+	for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
+		armv8pmu_disable_event(NULL, idx);
+
+	/* Initialize & Reset PMNC: C and P bits. */
+	armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
+
+	/* Disable access from userspace. */
+	asm volatile("msr pmuserenr_el0, %0" :: "r" (0));
+}
+
+static int armv8_pmuv3_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv8_pmuv3_perf_map,
+				&armv8_pmuv3_perf_cache_map,
+				ARMV8_EVTYPE_EVENT);
+}
+
+static struct arm_pmu armv8pmu = {
+	.handle_irq		= armv8pmu_handle_irq,
+	.enable			= armv8pmu_enable_event,
+	.disable		= armv8pmu_disable_event,
+	.read_counter		= armv8pmu_read_counter,
+	.write_counter		= armv8pmu_write_counter,
+	.get_event_idx		= armv8pmu_get_event_idx,
+	.start			= armv8pmu_start,
+	.stop			= armv8pmu_stop,
+	.reset			= armv8pmu_reset,
+	.max_period		= (1LLU << 32) - 1,
+};
+
+static u32 __init armv8pmu_read_num_pmnc_events(void)
+{
+	u32 nb_cnt;
+
+	/* Read the nb of CNTx counters supported from PMNC */
+	nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
+
+	/* Add the CPU cycles counter and return */
+	return nb_cnt + 1;
+}
+
+static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
+{
+	armv8pmu.name			= "arm/armv8-pmuv3";
+	armv8pmu.map_event		= armv8_pmuv3_map_event;
+	armv8pmu.num_events		= armv8pmu_read_num_pmnc_events();
+	armv8pmu.set_event_filter	= armv8pmu_set_event_filter;
+	return &armv8pmu;
+}
+
+/*
+ * Ensure the PMU has sane values out of reset.
+ * This requires SMP to be available, so exists as a separate initcall.
+ */
+static int __init
+cpu_pmu_reset(void)
+{
+	if (cpu_pmu && cpu_pmu->reset)
+		return on_each_cpu(cpu_pmu->reset, NULL, 1);
+	return 0;
+}
+arch_initcall(cpu_pmu_reset);
+
+/*
+ * PMU platform driver and devicetree bindings.
+ */
+static const struct of_device_id armpmu_of_device_ids[] = {
+	{.compatible = "arm,armv8-pmuv3"},
+	{},
+};
+
+static int armpmu_device_probe(struct platform_device *pdev)
+{
+	int i, irq, *irqs;
+
+	if (!cpu_pmu)
+		return -ENODEV;
+
+	/* Don't bother with PPIs; they're already affine */
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0 && irq_is_percpu(irq))
+		return 0;
+
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(pdev->dev.of_node), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+	}
+
+	if (i == pdev->num_resources)
+		cpu_pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
+	cpu_pmu->plat_device = pdev;
+	return 0;
+}
+
+static struct platform_driver armpmu_driver = {
+	.driver		= {
+		.name	= "arm-pmu",
+		.of_match_table = armpmu_of_device_ids,
+	},
+	.probe		= armpmu_device_probe,
+};
+
+static int __init register_pmu_driver(void)
+{
+	return platform_driver_register(&armpmu_driver);
+}
+device_initcall(register_pmu_driver);
+
+static struct pmu_hw_events *armpmu_get_cpu_events(void)
+{
+	return this_cpu_ptr(&cpu_hw_events);
+}
+
+static void __init cpu_pmu_init(struct arm_pmu *armpmu)
+{
+	int cpu;
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+		events->events = per_cpu(hw_events, cpu);
+		events->used_mask = per_cpu(used_mask, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+	}
+	armpmu->get_hw_events = armpmu_get_cpu_events;
+}
+
+static int __init init_hw_perf_events(void)
+{
+	u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+
+	switch ((dfr >> 8) & 0xf) {
+	case 0x1:	/* PMUv3 */
+		cpu_pmu = armv8_pmuv3_pmu_init();
+		break;
+	}
+
+	if (cpu_pmu) {
+		pr_info("enabled with %s PMU driver, %d counters available\n",
+			cpu_pmu->name, cpu_pmu->num_events);
+		cpu_pmu_init(cpu_pmu);
+		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
+	} else {
+		pr_info("no hardware support available\n");
+	}
+
+	return 0;
+}
+early_initcall(init_hw_perf_events);
+
+/*
+ * Callchain handling code.
+ */
+struct frame_tail {
+	struct frame_tail	__user *fp;
+	unsigned long		lr;
+} __attribute__((packed));
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static struct frame_tail __user *
+user_backtrace(struct frame_tail __user *tail,
+	       struct perf_callchain_entry *entry)
+{
+	struct frame_tail buftail;
+	unsigned long err;
+
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err)
+		return NULL;
+
+	perf_callchain_store(entry, buftail.lr);
+
+	/*
+	 * Frame pointers should strictly progress back up the stack
+	 * (towards higher addresses).
+	 */
+	if (tail >= buftail.fp)
+		return NULL;
+
+	return buftail.fp;
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct compat_frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct compat_frame_tail {
+	compat_uptr_t	fp; /* a (struct compat_frame_tail *) in compat mode */
+	u32		sp;
+	u32		lr;
+} __attribute__((packed));
+
+static struct compat_frame_tail __user *
+compat_user_backtrace(struct compat_frame_tail __user *tail,
+		      struct perf_callchain_entry *entry)
+{
+	struct compat_frame_tail buftail;
+	unsigned long err;
+
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err)
+		return NULL;
+
+	perf_callchain_store(entry, buftail.lr);
+
+	/*
+	 * Frame pointers should strictly progress back up the stack
+	 * (towards higher addresses).
+	 */
+	if (tail + 1 >= (struct compat_frame_tail __user *)
+			compat_ptr(buftail.fp))
+		return NULL;
+
+	return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+}
+#endif /* CONFIG_COMPAT */
+
+void perf_callchain_user(struct perf_callchain_entry *entry,
+			 struct pt_regs *regs)
+{
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	perf_callchain_store(entry, regs->pc);
+
+	if (!compat_user_mode(regs)) {
+		/* AARCH64 mode */
+		struct frame_tail __user *tail;
+
+		tail = (struct frame_tail __user *)regs->regs[29];
+
+		while (entry->nr < PERF_MAX_STACK_DEPTH &&
+		       tail && !((unsigned long)tail & 0xf))
+			tail = user_backtrace(tail, entry);
+	} else {
+#ifdef CONFIG_COMPAT
+		/* AARCH32 compat mode */
+		struct compat_frame_tail __user *tail;
+
+		tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
+
+		while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+			tail && !((unsigned long)tail & 0x3))
+			tail = compat_user_backtrace(tail, entry);
+#endif
+	}
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int callchain_trace(struct stackframe *frame, void *data)
+{
+	struct perf_callchain_entry *entry = data;
+	perf_callchain_store(entry, frame->pc);
+	return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
+{
+	struct stackframe frame;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	frame.fp = regs->regs[29];
+	frame.sp = regs->sp;
+	frame.pc = regs->pc;
+
+	walk_stackframe(&frame, callchain_trace, entry);
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		return perf_guest_cbs->get_guest_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	int misc = 0;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		if (perf_guest_cbs->is_user_mode())
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
new file mode 100644
index 000000000..3f62b35fb
--- /dev/null
+++ b/arch/arm64/kernel/perf_regs.c
@@ -0,0 +1,60 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+
+#include <asm/compat.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX))
+		return 0;
+
+	/*
+	 * Compat (i.e. 32 bit) mode:
+	 * - PC has been set in the pt_regs struct in kernel_entry,
+	 * - Handle SP and LR here.
+	 */
+	if (compat_user_mode(regs)) {
+		if ((u32)idx == PERF_REG_ARM64_SP)
+			return regs->compat_sp;
+		if ((u32)idx == PERF_REG_ARM64_LR)
+			return regs->compat_lr;
+	}
+
+	if ((u32)idx == PERF_REG_ARM64_SP)
+		return regs->sp;
+
+	if ((u32)idx == PERF_REG_ARM64_PC)
+		return regs->pc;
+
+	return regs->regs[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (is_compat_thread(task_thread_info(task)))
+		return PERF_SAMPLE_REGS_ABI_32;
+	else
+		return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
new file mode 100644
index 000000000..c6b1f3b96
--- /dev/null
+++ b/arch/arm64/kernel/process.c
@@ -0,0 +1,388 @@
+/*
+ * Based on arch/arm/kernel/process.c
+ *
+ * Original Copyright (C) 1995  Linus Torvalds
+ * Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdarg.h>
+
+#include <linux/compat.h>
+#include <linux/efi.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/elfcore.h>
+#include <linux/pm.h>
+#include <linux/tick.h>
+#include <linux/utsname.h>
+#include <linux/uaccess.h>
+#include <linux/random.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/personality.h>
+#include <linux/notifier.h>
+
+#include <asm/compat.h>
+#include <asm/cacheflush.h>
+#include <asm/fpsimd.h>
+#include <asm/mmu_context.h>
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
+void soft_restart(unsigned long addr)
+{
+	setup_mm_for_reboot();
+	cpu_soft_restart(virt_to_phys(cpu_reset), addr);
+	/* Should never get here */
+	BUG();
+}
+
+/*
+ * Function pointers to optional machine specific functions
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
+
+/*
+ * This is our default idle handler.
+ */
+void arch_cpu_idle(void)
+{
+	/*
+	 * This should do all the clock switching and wait for interrupt
+	 * tricks
+	 */
+	cpu_do_idle();
+	local_irq_enable();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+       cpu_die();
+}
+#endif
+
+/*
+ * Called by kexec, immediately prior to machine_kexec().
+ *
+ * This must completely disable all secondary CPUs; simply causing those CPUs
+ * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
+ * kexec'd kernel to use any and all RAM as it sees fit, without having to
+ * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
+ * functionality embodied in disable_nonboot_cpus() to achieve this.
+ */
+void machine_shutdown(void)
+{
+	disable_nonboot_cpus();
+}
+
+/*
+ * Halting simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this.
+ */
+void machine_halt(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+	while (1);
+}
+
+/*
+ * Power-off simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this. When the system power is turned off, it will take all CPUs
+ * with it.
+ */
+void machine_power_off(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+	if (pm_power_off)
+		pm_power_off();
+}
+
+/*
+ * Restart requires that the secondary CPUs stop performing any activity
+ * while the primary CPU resets the system. Systems with a single CPU can
+ * use soft_restart() as their machine descriptor's .restart hook, since that
+ * will cause the only available CPU to reset. Systems with multiple CPUs must
+ * provide a HW restart implementation, to ensure that all CPUs reset at once.
+ * This is required so that any code running after reset on the primary CPU
+ * doesn't have to co-ordinate with other CPUs to ensure they aren't still
+ * executing pre-reset code, and using RAM that the primary CPU's code wishes
+ * to use. Implementing such co-ordination would be essentially impossible.
+ */
+void machine_restart(char *cmd)
+{
+	/* Disable interrupts first */
+	local_irq_disable();
+	smp_send_stop();
+
+	/*
+	 * UpdateCapsule() depends on the system being reset via
+	 * ResetSystem().
+	 */
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
+		efi_reboot(reboot_mode, NULL);
+
+	/* Now call the architecture specific reboot code. */
+	if (arm_pm_restart)
+		arm_pm_restart(reboot_mode, cmd);
+	else
+		do_kernel_restart(cmd);
+
+	/*
+	 * Whoops - the architecture was unable to reboot.
+	 */
+	printk("Reboot failed -- System halted\n");
+	while (1);
+}
+
+void __show_regs(struct pt_regs *regs)
+{
+	int i, top_reg;
+	u64 lr, sp;
+
+	if (compat_user_mode(regs)) {
+		lr = regs->compat_lr;
+		sp = regs->compat_sp;
+		top_reg = 12;
+	} else {
+		lr = regs->regs[30];
+		sp = regs->sp;
+		top_reg = 29;
+	}
+
+	show_regs_print_info(KERN_DEFAULT);
+	print_symbol("PC is at %s\n", instruction_pointer(regs));
+	print_symbol("LR is at %s\n", lr);
+	printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
+	       regs->pc, lr, regs->pstate);
+	printk("sp : %016llx\n", sp);
+	for (i = top_reg; i >= 0; i--) {
+		printk("x%-2d: %016llx ", i, regs->regs[i]);
+		if (i % 2 == 0)
+			printk("\n");
+	}
+	printk("\n");
+}
+
+void show_regs(struct pt_regs * regs)
+{
+	printk("\n");
+	__show_regs(regs);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+}
+
+static void tls_thread_flush(void)
+{
+	asm ("msr tpidr_el0, xzr");
+
+	if (is_compat_task()) {
+		current->thread.tp_value = 0;
+
+		/*
+		 * We need to ensure ordering between the shadow state and the
+		 * hardware state, so that we don't corrupt the hardware state
+		 * with a stale shadow state during context switch.
+		 */
+		barrier();
+		asm ("msr tpidrro_el0, xzr");
+	}
+}
+
+void flush_thread(void)
+{
+	fpsimd_flush_thread();
+	tls_thread_flush();
+	flush_ptrace_hw_breakpoint(current);
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	fpsimd_preserve_current_state();
+	*dst = *src;
+	return 0;
+}
+
+asmlinkage void ret_from_fork(void) asm("ret_from_fork");
+
+int copy_thread(unsigned long clone_flags, unsigned long stack_start,
+		unsigned long stk_sz, struct task_struct *p)
+{
+	struct pt_regs *childregs = task_pt_regs(p);
+	unsigned long tls = p->thread.tp_value;
+
+	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
+
+	if (likely(!(p->flags & PF_KTHREAD))) {
+		*childregs = *current_pt_regs();
+		childregs->regs[0] = 0;
+		if (is_compat_thread(task_thread_info(p))) {
+			if (stack_start)
+				childregs->compat_sp = stack_start;
+		} else {
+			/*
+			 * Read the current TLS pointer from tpidr_el0 as it may be
+			 * out-of-sync with the saved value.
+			 */
+			asm("mrs %0, tpidr_el0" : "=r" (tls));
+			if (stack_start) {
+				/* 16-byte aligned stack mandatory on AArch64 */
+				if (stack_start & 15)
+					return -EINVAL;
+				childregs->sp = stack_start;
+			}
+		}
+		/*
+		 * If a TLS pointer was passed to clone (4th argument), use it
+		 * for the new thread.
+		 */
+		if (clone_flags & CLONE_SETTLS)
+			tls = childregs->regs[3];
+	} else {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->pstate = PSR_MODE_EL1h;
+		p->thread.cpu_context.x19 = stack_start;
+		p->thread.cpu_context.x20 = stk_sz;
+	}
+	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
+	p->thread.cpu_context.sp = (unsigned long)childregs;
+	p->thread.tp_value = tls;
+
+	ptrace_hw_copy_thread(p);
+
+	return 0;
+}
+
+static void tls_thread_switch(struct task_struct *next)
+{
+	unsigned long tpidr, tpidrro;
+
+	if (!is_compat_task()) {
+		asm("mrs %0, tpidr_el0" : "=r" (tpidr));
+		current->thread.tp_value = tpidr;
+	}
+
+	if (is_compat_thread(task_thread_info(next))) {
+		tpidr = 0;
+		tpidrro = next->thread.tp_value;
+	} else {
+		tpidr = next->thread.tp_value;
+		tpidrro = 0;
+	}
+
+	asm(
+	"	msr	tpidr_el0, %0\n"
+	"	msr	tpidrro_el0, %1"
+	: : "r" (tpidr), "r" (tpidrro));
+}
+
+/*
+ * Thread switching.
+ */
+struct task_struct *__switch_to(struct task_struct *prev,
+				struct task_struct *next)
+{
+	struct task_struct *last;
+
+	fpsimd_thread_switch(next);
+	tls_thread_switch(next);
+	hw_breakpoint_thread_switch(next);
+	contextidr_thread_switch(next);
+
+	/*
+	 * Complete any pending TLB or cache maintenance on this CPU in case
+	 * the thread migrates to a different CPU.
+	 */
+	dsb(ish);
+
+	/* the actual thread switch */
+	last = cpu_switch_to(prev, next);
+
+	return last;
+}
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	struct stackframe frame;
+	unsigned long stack_page;
+	int count = 0;
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	frame.fp = thread_saved_fp(p);
+	frame.sp = thread_saved_sp(p);
+	frame.pc = thread_saved_pc(p);
+	stack_page = (unsigned long)task_stack_page(p);
+	do {
+		if (frame.sp < stack_page ||
+		    frame.sp >= stack_page + THREAD_SIZE ||
+		    unwind_frame(&frame))
+			return 0;
+		if (!in_sched_functions(frame.pc))
+			return frame.pc;
+	} while (count ++ < 16);
+	return 0;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_int() & ~PAGE_MASK;
+	return sp & ~0xf;
+}
+
+static unsigned long randomize_base(unsigned long base)
+{
+	unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1;
+	return randomize_range(base, range_end, 0) ? : base;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	return randomize_base(mm->brk);
+}
diff --git a/arch/arm64/kernel/psci-call.S b/arch/arm64/kernel/psci-call.S
new file mode 100644
index 000000000..cf83e61cd
--- /dev/null
+++ b/arch/arm64/kernel/psci-call.S
@@ -0,0 +1,28 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2015 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/linkage.h>
+
+/* int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */
+ENTRY(__invoke_psci_fn_hvc)
+	hvc	#0
+	ret
+ENDPROC(__invoke_psci_fn_hvc)
+
+/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */
+ENTRY(__invoke_psci_fn_smc)
+	smc	#0
+	ret
+ENDPROC(__invoke_psci_fn_smc)
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
new file mode 100644
index 000000000..ea18cb539
--- /dev/null
+++ b/arch/arm64/kernel/psci.c
@@ -0,0 +1,571 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#define pr_fmt(fmt) "psci: " fmt
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <uapi/linux/psci.h>
+
+#include <asm/acpi.h>
+#include <asm/compiler.h>
+#include <asm/cpu_ops.h>
+#include <asm/errno.h>
+#include <asm/psci.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+#include <asm/system_misc.h>
+
+#define PSCI_POWER_STATE_TYPE_STANDBY		0
+#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
+
+struct psci_power_state {
+	u16	id;
+	u8	type;
+	u8	affinity_level;
+};
+
+struct psci_operations {
+	int (*cpu_suspend)(struct psci_power_state state,
+			   unsigned long entry_point);
+	int (*cpu_off)(struct psci_power_state state);
+	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
+	int (*migrate)(unsigned long cpuid);
+	int (*affinity_info)(unsigned long target_affinity,
+			unsigned long lowest_affinity_level);
+	int (*migrate_info_type)(void);
+};
+
+static struct psci_operations psci_ops;
+
+static int (*invoke_psci_fn)(u64, u64, u64, u64);
+typedef int (*psci_initcall_t)(const struct device_node *);
+
+asmlinkage int __invoke_psci_fn_hvc(u64, u64, u64, u64);
+asmlinkage int __invoke_psci_fn_smc(u64, u64, u64, u64);
+
+enum psci_function {
+	PSCI_FN_CPU_SUSPEND,
+	PSCI_FN_CPU_ON,
+	PSCI_FN_CPU_OFF,
+	PSCI_FN_MIGRATE,
+	PSCI_FN_AFFINITY_INFO,
+	PSCI_FN_MIGRATE_INFO_TYPE,
+	PSCI_FN_MAX,
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state);
+
+static u32 psci_function_id[PSCI_FN_MAX];
+
+static int psci_to_linux_errno(int errno)
+{
+	switch (errno) {
+	case PSCI_RET_SUCCESS:
+		return 0;
+	case PSCI_RET_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case PSCI_RET_INVALID_PARAMS:
+		return -EINVAL;
+	case PSCI_RET_DENIED:
+		return -EPERM;
+	};
+
+	return -EINVAL;
+}
+
+static u32 psci_power_state_pack(struct psci_power_state state)
+{
+	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
+			& PSCI_0_2_POWER_STATE_ID_MASK) |
+		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+		 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
+		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
+}
+
+static void psci_power_state_unpack(u32 power_state,
+				    struct psci_power_state *state)
+{
+	state->id = (power_state & PSCI_0_2_POWER_STATE_ID_MASK) >>
+			PSCI_0_2_POWER_STATE_ID_SHIFT;
+	state->type = (power_state & PSCI_0_2_POWER_STATE_TYPE_MASK) >>
+			PSCI_0_2_POWER_STATE_TYPE_SHIFT;
+	state->affinity_level =
+			(power_state & PSCI_0_2_POWER_STATE_AFFL_MASK) >>
+			PSCI_0_2_POWER_STATE_AFFL_SHIFT;
+}
+
+static int psci_get_version(void)
+{
+	int err;
+
+	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+	return err;
+}
+
+static int psci_cpu_suspend(struct psci_power_state state,
+			    unsigned long entry_point)
+{
+	int err;
+	u32 fn, power_state;
+
+	fn = psci_function_id[PSCI_FN_CPU_SUSPEND];
+	power_state = psci_power_state_pack(state);
+	err = invoke_psci_fn(fn, power_state, entry_point, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_cpu_off(struct psci_power_state state)
+{
+	int err;
+	u32 fn, power_state;
+
+	fn = psci_function_id[PSCI_FN_CPU_OFF];
+	power_state = psci_power_state_pack(state);
+	err = invoke_psci_fn(fn, power_state, 0, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_CPU_ON];
+	err = invoke_psci_fn(fn, cpuid, entry_point, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_migrate(unsigned long cpuid)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_MIGRATE];
+	err = invoke_psci_fn(fn, cpuid, 0, 0);
+	return psci_to_linux_errno(err);
+}
+
+static int psci_affinity_info(unsigned long target_affinity,
+		unsigned long lowest_affinity_level)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
+	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
+	return err;
+}
+
+static int psci_migrate_info_type(void)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
+	err = invoke_psci_fn(fn, 0, 0, 0);
+	return err;
+}
+
+static int __maybe_unused cpu_psci_cpu_init_idle(struct device_node *cpu_node,
+						 unsigned int cpu)
+{
+	int i, ret, count = 0;
+	struct psci_power_state *psci_states;
+	struct device_node *state_node;
+
+	/*
+	 * If the PSCI cpu_suspend function hook has not been initialized
+	 * idle states must not be enabled, so bail out
+	 */
+	if (!psci_ops.cpu_suspend)
+		return -EOPNOTSUPP;
+
+	/* Count idle states */
+	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+					      count))) {
+		count++;
+		of_node_put(state_node);
+	}
+
+	if (!count)
+		return -ENODEV;
+
+	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+	if (!psci_states)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		u32 psci_power_state;
+
+		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+
+		ret = of_property_read_u32(state_node,
+					   "arm,psci-suspend-param",
+					   &psci_power_state);
+		if (ret) {
+			pr_warn(" * %s missing arm,psci-suspend-param property\n",
+				state_node->full_name);
+			of_node_put(state_node);
+			goto free_mem;
+		}
+
+		of_node_put(state_node);
+		pr_debug("psci-power-state %#x index %d\n", psci_power_state,
+							    i);
+		psci_power_state_unpack(psci_power_state, &psci_states[i]);
+	}
+	/* Idle states parsed correctly, initialize per-cpu pointer */
+	per_cpu(psci_power_state, cpu) = psci_states;
+	return 0;
+
+free_mem:
+	kfree(psci_states);
+	return ret;
+}
+
+static int get_set_conduit_method(struct device_node *np)
+{
+	const char *method;
+
+	pr_info("probing for conduit method from DT.\n");
+
+	if (of_property_read_string(np, "method", &method)) {
+		pr_warn("missing \"method\" property\n");
+		return -ENXIO;
+	}
+
+	if (!strcmp("hvc", method)) {
+		invoke_psci_fn = __invoke_psci_fn_hvc;
+	} else if (!strcmp("smc", method)) {
+		invoke_psci_fn = __invoke_psci_fn_smc;
+	} else {
+		pr_warn("invalid \"method\" property: %s\n", method);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+}
+
+static void psci_sys_poweroff(void)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+}
+
+static void __init psci_0_2_set_functions(void)
+{
+	pr_info("Using standard PSCI v0.2 function IDs\n");
+	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
+	psci_ops.cpu_suspend = psci_cpu_suspend;
+
+	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
+	psci_ops.cpu_off = psci_cpu_off;
+
+	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON;
+	psci_ops.cpu_on = psci_cpu_on;
+
+	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE;
+	psci_ops.migrate = psci_migrate;
+
+	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO;
+	psci_ops.affinity_info = psci_affinity_info;
+
+	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
+		PSCI_0_2_FN_MIGRATE_INFO_TYPE;
+	psci_ops.migrate_info_type = psci_migrate_info_type;
+
+	arm_pm_restart = psci_sys_reset;
+
+	pm_power_off = psci_sys_poweroff;
+}
+
+/*
+ * Probe function for PSCI firmware versions >= 0.2
+ */
+static int __init psci_probe(void)
+{
+	int ver = psci_get_version();
+
+	if (ver == PSCI_RET_NOT_SUPPORTED) {
+		/*
+		 * PSCI versions >=0.2 mandates implementation of
+		 * PSCI_VERSION.
+		 */
+		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+		return -EOPNOTSUPP;
+	} else {
+		pr_info("PSCIv%d.%d detected in firmware.\n",
+				PSCI_VERSION_MAJOR(ver),
+				PSCI_VERSION_MINOR(ver));
+
+		if (PSCI_VERSION_MAJOR(ver) == 0 &&
+				PSCI_VERSION_MINOR(ver) < 2) {
+			pr_err("Conflicting PSCI version detected.\n");
+			return -EINVAL;
+		}
+	}
+
+	psci_0_2_set_functions();
+
+	return 0;
+}
+
+/*
+ * PSCI init function for PSCI versions >=0.2
+ *
+ * Probe based on PSCI PSCI_VERSION function
+ */
+static int __init psci_0_2_init(struct device_node *np)
+{
+	int err;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+	/*
+	 * Starting with v0.2, the PSCI specification introduced a call
+	 * (PSCI_VERSION) that allows probing the firmware version, so
+	 * that PSCI function IDs and version specific initialization
+	 * can be carried out according to the specific version reported
+	 * by firmware
+	 */
+	err = psci_probe();
+
+out_put_node:
+	of_node_put(np);
+	return err;
+}
+
+/*
+ * PSCI < v0.2 get PSCI Function IDs via DT.
+ */
+static int __init psci_0_1_init(struct device_node *np)
+{
+	u32 id;
+	int err;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+
+	pr_info("Using PSCI v0.1 Function IDs from DT\n");
+
+	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
+		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
+		psci_ops.cpu_suspend = psci_cpu_suspend;
+	}
+
+	if (!of_property_read_u32(np, "cpu_off", &id)) {
+		psci_function_id[PSCI_FN_CPU_OFF] = id;
+		psci_ops.cpu_off = psci_cpu_off;
+	}
+
+	if (!of_property_read_u32(np, "cpu_on", &id)) {
+		psci_function_id[PSCI_FN_CPU_ON] = id;
+		psci_ops.cpu_on = psci_cpu_on;
+	}
+
+	if (!of_property_read_u32(np, "migrate", &id)) {
+		psci_function_id[PSCI_FN_MIGRATE] = id;
+		psci_ops.migrate = psci_migrate;
+	}
+
+out_put_node:
+	of_node_put(np);
+	return err;
+}
+
+static const struct of_device_id psci_of_match[] __initconst = {
+	{ .compatible = "arm,psci",	.data = psci_0_1_init},
+	{ .compatible = "arm,psci-0.2",	.data = psci_0_2_init},
+	{},
+};
+
+int __init psci_dt_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *matched_np;
+	psci_initcall_t init_fn;
+
+	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
+
+	if (!np)
+		return -ENODEV;
+
+	init_fn = (psci_initcall_t)matched_np->data;
+	return init_fn(np);
+}
+
+/*
+ * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's
+ * explicitly clarified in SBBR
+ */
+int __init psci_acpi_init(void)
+{
+	if (!acpi_psci_present()) {
+		pr_info("is not implemented in ACPI.\n");
+		return -EOPNOTSUPP;
+	}
+
+	pr_info("probing for conduit method from ACPI.\n");
+
+	if (acpi_psci_use_hvc())
+		invoke_psci_fn = __invoke_psci_fn_hvc;
+	else
+		invoke_psci_fn = __invoke_psci_fn_smc;
+
+	return psci_probe();
+}
+
+#ifdef CONFIG_SMP
+
+static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
+{
+	return 0;
+}
+
+static int __init cpu_psci_cpu_prepare(unsigned int cpu)
+{
+	if (!psci_ops.cpu_on) {
+		pr_err("no cpu_on method, not booting CPU%d\n", cpu);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int cpu_psci_cpu_boot(unsigned int cpu)
+{
+	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
+	if (err)
+		pr_err("failed to boot CPU%d (%d)\n", cpu, err);
+
+	return err;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int cpu_psci_cpu_disable(unsigned int cpu)
+{
+	/* Fail early if we don't have CPU_OFF support */
+	if (!psci_ops.cpu_off)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static void cpu_psci_cpu_die(unsigned int cpu)
+{
+	int ret;
+	/*
+	 * There are no known implementations of PSCI actually using the
+	 * power state field, pass a sensible default for now.
+	 */
+	struct psci_power_state state = {
+		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+	};
+
+	ret = psci_ops.cpu_off(state);
+
+	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
+}
+
+static int cpu_psci_cpu_kill(unsigned int cpu)
+{
+	int err, i;
+
+	if (!psci_ops.affinity_info)
+		return 1;
+	/*
+	 * cpu_kill could race with cpu_die and we can
+	 * potentially end up declaring this cpu undead
+	 * while it is dying. So, try again a few times.
+	 */
+
+	for (i = 0; i < 10; i++) {
+		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
+		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
+			pr_info("CPU%d killed.\n", cpu);
+			return 1;
+		}
+
+		msleep(10);
+		pr_info("Retrying again to check for CPU kill\n");
+	}
+
+	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+			cpu, err);
+	/* Make op_cpu_kill() fail. */
+	return 0;
+}
+#endif
+#endif
+
+static int psci_suspend_finisher(unsigned long index)
+{
+	struct psci_power_state *state = __this_cpu_read(psci_power_state);
+
+	return psci_ops.cpu_suspend(state[index - 1],
+				    virt_to_phys(cpu_resume));
+}
+
+static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
+{
+	int ret;
+	struct psci_power_state *state = __this_cpu_read(psci_power_state);
+	/*
+	 * idle state index 0 corresponds to wfi, should never be called
+	 * from the cpu_suspend operations
+	 */
+	if (WARN_ON_ONCE(!index))
+		return -EINVAL;
+
+	if (state[index - 1].type == PSCI_POWER_STATE_TYPE_STANDBY)
+		ret = psci_ops.cpu_suspend(state[index - 1], 0);
+	else
+		ret = __cpu_suspend(index, psci_suspend_finisher);
+
+	return ret;
+}
+
+const struct cpu_operations cpu_psci_ops = {
+	.name		= "psci",
+#ifdef CONFIG_CPU_IDLE
+	.cpu_init_idle	= cpu_psci_cpu_init_idle,
+	.cpu_suspend	= cpu_psci_cpu_suspend,
+#endif
+#ifdef CONFIG_SMP
+	.cpu_init	= cpu_psci_cpu_init,
+	.cpu_prepare	= cpu_psci_cpu_prepare,
+	.cpu_boot	= cpu_psci_cpu_boot,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= cpu_psci_cpu_disable,
+	.cpu_die	= cpu_psci_cpu_die,
+	.cpu_kill	= cpu_psci_cpu_kill,
+#endif
+#endif
+};
+
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
new file mode 100644
index 000000000..d882b833d
--- /dev/null
+++ b/arch/arm64/kernel/ptrace.c
@@ -0,0 +1,1178 @@
+/*
+ * Based on arch/arm/kernel/ptrace.c
+ *
+ * By Ross Biro 1/23/92
+ * edited by Linus Torvalds
+ * ARM modifications Copyright (C) 2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/audit.h>
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/seccomp.h>
+#include <linux/security.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/uaccess.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/elf.h>
+
+#include <asm/compat.h>
+#include <asm/debug-monitors.h>
+#include <asm/pgtable.h>
+#include <asm/syscall.h>
+#include <asm/traps.h>
+#include <asm/system_misc.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+/*
+ * TODO: does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ */
+void ptrace_disable(struct task_struct *child)
+{
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+/*
+ * Handle hitting a HW-breakpoint.
+ */
+static void ptrace_hbptriggered(struct perf_event *bp,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
+{
+	struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
+	siginfo_t info = {
+		.si_signo	= SIGTRAP,
+		.si_errno	= 0,
+		.si_code	= TRAP_HWBKPT,
+		.si_addr	= (void __user *)(bkpt->trigger),
+	};
+
+#ifdef CONFIG_COMPAT
+	int i;
+
+	if (!is_compat_task())
+		goto send_sig;
+
+	for (i = 0; i < ARM_MAX_BRP; ++i) {
+		if (current->thread.debug.hbp_break[i] == bp) {
+			info.si_errno = (i << 1) + 1;
+			break;
+		}
+	}
+
+	for (i = 0; i < ARM_MAX_WRP; ++i) {
+		if (current->thread.debug.hbp_watch[i] == bp) {
+			info.si_errno = -((i << 1) + 1);
+			break;
+		}
+	}
+
+send_sig:
+#endif
+	force_sig_info(SIGTRAP, &info, current);
+}
+
+/*
+ * Unregister breakpoints from this task and reset the pointers in
+ * the thread_struct.
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < ARM_MAX_BRP; i++) {
+		if (t->debug.hbp_break[i]) {
+			unregister_hw_breakpoint(t->debug.hbp_break[i]);
+			t->debug.hbp_break[i] = NULL;
+		}
+	}
+
+	for (i = 0; i < ARM_MAX_WRP; i++) {
+		if (t->debug.hbp_watch[i]) {
+			unregister_hw_breakpoint(t->debug.hbp_watch[i]);
+			t->debug.hbp_watch[i] = NULL;
+		}
+	}
+}
+
+void ptrace_hw_copy_thread(struct task_struct *tsk)
+{
+	memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
+}
+
+static struct perf_event *ptrace_hbp_get_event(unsigned int note_type,
+					       struct task_struct *tsk,
+					       unsigned long idx)
+{
+	struct perf_event *bp = ERR_PTR(-EINVAL);
+
+	switch (note_type) {
+	case NT_ARM_HW_BREAK:
+		if (idx < ARM_MAX_BRP)
+			bp = tsk->thread.debug.hbp_break[idx];
+		break;
+	case NT_ARM_HW_WATCH:
+		if (idx < ARM_MAX_WRP)
+			bp = tsk->thread.debug.hbp_watch[idx];
+		break;
+	}
+
+	return bp;
+}
+
+static int ptrace_hbp_set_event(unsigned int note_type,
+				struct task_struct *tsk,
+				unsigned long idx,
+				struct perf_event *bp)
+{
+	int err = -EINVAL;
+
+	switch (note_type) {
+	case NT_ARM_HW_BREAK:
+		if (idx < ARM_MAX_BRP) {
+			tsk->thread.debug.hbp_break[idx] = bp;
+			err = 0;
+		}
+		break;
+	case NT_ARM_HW_WATCH:
+		if (idx < ARM_MAX_WRP) {
+			tsk->thread.debug.hbp_watch[idx] = bp;
+			err = 0;
+		}
+		break;
+	}
+
+	return err;
+}
+
+static struct perf_event *ptrace_hbp_create(unsigned int note_type,
+					    struct task_struct *tsk,
+					    unsigned long idx)
+{
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+	int err, type;
+
+	switch (note_type) {
+	case NT_ARM_HW_BREAK:
+		type = HW_BREAKPOINT_X;
+		break;
+	case NT_ARM_HW_WATCH:
+		type = HW_BREAKPOINT_RW;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	ptrace_breakpoint_init(&attr);
+
+	/*
+	 * Initialise fields to sane defaults
+	 * (i.e. values that will pass validation).
+	 */
+	attr.bp_addr	= 0;
+	attr.bp_len	= HW_BREAKPOINT_LEN_4;
+	attr.bp_type	= type;
+	attr.disabled	= 1;
+
+	bp = register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, tsk);
+	if (IS_ERR(bp))
+		return bp;
+
+	err = ptrace_hbp_set_event(note_type, tsk, idx, bp);
+	if (err)
+		return ERR_PTR(err);
+
+	return bp;
+}
+
+static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
+				     struct arch_hw_breakpoint_ctrl ctrl,
+				     struct perf_event_attr *attr)
+{
+	int err, len, type, disabled = !ctrl.enabled;
+
+	attr->disabled = disabled;
+	if (disabled)
+		return 0;
+
+	err = arch_bp_generic_fields(ctrl, &len, &type);
+	if (err)
+		return err;
+
+	switch (note_type) {
+	case NT_ARM_HW_BREAK:
+		if ((type & HW_BREAKPOINT_X) != type)
+			return -EINVAL;
+		break;
+	case NT_ARM_HW_WATCH:
+		if ((type & HW_BREAKPOINT_RW) != type)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	attr->bp_len	= len;
+	attr->bp_type	= type;
+
+	return 0;
+}
+
+static int ptrace_hbp_get_resource_info(unsigned int note_type, u32 *info)
+{
+	u8 num;
+	u32 reg = 0;
+
+	switch (note_type) {
+	case NT_ARM_HW_BREAK:
+		num = hw_breakpoint_slots(TYPE_INST);
+		break;
+	case NT_ARM_HW_WATCH:
+		num = hw_breakpoint_slots(TYPE_DATA);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	reg |= debug_monitors_arch();
+	reg <<= 8;
+	reg |= num;
+
+	*info = reg;
+	return 0;
+}
+
+static int ptrace_hbp_get_ctrl(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx,
+			       u32 *ctrl)
+{
+	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	*ctrl = bp ? encode_ctrl_reg(counter_arch_bp(bp)->ctrl) : 0;
+	return 0;
+}
+
+static int ptrace_hbp_get_addr(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx,
+			       u64 *addr)
+{
+	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	*addr = bp ? bp->attr.bp_addr : 0;
+	return 0;
+}
+
+static struct perf_event *ptrace_hbp_get_initialised_bp(unsigned int note_type,
+							struct task_struct *tsk,
+							unsigned long idx)
+{
+	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+	if (!bp)
+		bp = ptrace_hbp_create(note_type, tsk, idx);
+
+	return bp;
+}
+
+static int ptrace_hbp_set_ctrl(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx,
+			       u32 uctrl)
+{
+	int err;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+	struct arch_hw_breakpoint_ctrl ctrl;
+
+	bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+	if (IS_ERR(bp)) {
+		err = PTR_ERR(bp);
+		return err;
+	}
+
+	attr = bp->attr;
+	decode_ctrl_reg(uctrl, &ctrl);
+	err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr);
+	if (err)
+		return err;
+
+	return modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int ptrace_hbp_set_addr(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx,
+			       u64 addr)
+{
+	int err;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+
+	bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+	if (IS_ERR(bp)) {
+		err = PTR_ERR(bp);
+		return err;
+	}
+
+	attr = bp->attr;
+	attr.bp_addr = addr;
+	err = modify_user_hw_breakpoint(bp, &attr);
+	return err;
+}
+
+#define PTRACE_HBP_ADDR_SZ	sizeof(u64)
+#define PTRACE_HBP_CTRL_SZ	sizeof(u32)
+#define PTRACE_HBP_PAD_SZ	sizeof(u32)
+
+static int hw_break_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	unsigned int note_type = regset->core_note_type;
+	int ret, idx = 0, offset, limit;
+	u32 info, ctrl;
+	u64 addr;
+
+	/* Resource info */
+	ret = ptrace_hbp_get_resource_info(note_type, &info);
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0,
+				  sizeof(info));
+	if (ret)
+		return ret;
+
+	/* Pad */
+	offset = offsetof(struct user_hwdebug_state, pad);
+	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset,
+				       offset + PTRACE_HBP_PAD_SZ);
+	if (ret)
+		return ret;
+
+	/* (address, ctrl) registers */
+	offset = offsetof(struct user_hwdebug_state, dbg_regs);
+	limit = regset->n * regset->size;
+	while (count && offset < limit) {
+		ret = ptrace_hbp_get_addr(note_type, target, idx, &addr);
+		if (ret)
+			return ret;
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr,
+					  offset, offset + PTRACE_HBP_ADDR_SZ);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_ADDR_SZ;
+
+		ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl);
+		if (ret)
+			return ret;
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl,
+					  offset, offset + PTRACE_HBP_CTRL_SZ);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_CTRL_SZ;
+
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       offset,
+					       offset + PTRACE_HBP_PAD_SZ);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_PAD_SZ;
+		idx++;
+	}
+
+	return 0;
+}
+
+static int hw_break_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	unsigned int note_type = regset->core_note_type;
+	int ret, idx = 0, offset, limit;
+	u32 ctrl;
+	u64 addr;
+
+	/* Resource info and pad */
+	offset = offsetof(struct user_hwdebug_state, dbg_regs);
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset);
+	if (ret)
+		return ret;
+
+	/* (address, ctrl) registers */
+	limit = regset->n * regset->size;
+	while (count && offset < limit) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr,
+					 offset, offset + PTRACE_HBP_ADDR_SZ);
+		if (ret)
+			return ret;
+		ret = ptrace_hbp_set_addr(note_type, target, idx, addr);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_ADDR_SZ;
+
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl,
+					 offset, offset + PTRACE_HBP_CTRL_SZ);
+		if (ret)
+			return ret;
+		ret = ptrace_hbp_set_ctrl(note_type, target, idx, ctrl);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_CTRL_SZ;
+
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						offset,
+						offset + PTRACE_HBP_PAD_SZ);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_PAD_SZ;
+		idx++;
+	}
+
+	return 0;
+}
+#endif	/* CONFIG_HAVE_HW_BREAKPOINT */
+
+static int gpr_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+}
+
+static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct user_pt_regs newregs;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1);
+	if (ret)
+		return ret;
+
+	if (!valid_user_regs(&newregs))
+		return -EINVAL;
+
+	task_pt_regs(target)->user_regs = newregs;
+	return 0;
+}
+
+/*
+ * TODO: update fp accessors for lazy context switching (sync/flush hwstate)
+ */
+static int fpr_get(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	struct user_fpsimd_state *uregs;
+	uregs = &target->thread.fpsimd_state.user_fpsimd;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+}
+
+static int fpr_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct user_fpsimd_state newstate;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
+	if (ret)
+		return ret;
+
+	target->thread.fpsimd_state.user_fpsimd = newstate;
+	fpsimd_flush_task_state(target);
+	return ret;
+}
+
+static int tls_get(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	unsigned long *tls = &target->thread.tp_value;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1);
+}
+
+static int tls_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	unsigned long tls;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+	if (ret)
+		return ret;
+
+	target->thread.tp_value = tls;
+	return ret;
+}
+
+static int system_call_get(struct task_struct *target,
+			   const struct user_regset *regset,
+			   unsigned int pos, unsigned int count,
+			   void *kbuf, void __user *ubuf)
+{
+	int syscallno = task_pt_regs(target)->syscallno;
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &syscallno, 0, -1);
+}
+
+static int system_call_set(struct task_struct *target,
+			   const struct user_regset *regset,
+			   unsigned int pos, unsigned int count,
+			   const void *kbuf, const void __user *ubuf)
+{
+	int syscallno, ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1);
+	if (ret)
+		return ret;
+
+	task_pt_regs(target)->syscallno = syscallno;
+	return ret;
+}
+
+enum aarch64_regset {
+	REGSET_GPR,
+	REGSET_FPR,
+	REGSET_TLS,
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	REGSET_HW_BREAK,
+	REGSET_HW_WATCH,
+#endif
+	REGSET_SYSTEM_CALL,
+};
+
+static const struct user_regset aarch64_regsets[] = {
+	[REGSET_GPR] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(struct user_pt_regs) / sizeof(u64),
+		.size = sizeof(u64),
+		.align = sizeof(u64),
+		.get = gpr_get,
+		.set = gpr_set
+	},
+	[REGSET_FPR] = {
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct user_fpsimd_state) / sizeof(u32),
+		/*
+		 * We pretend we have 32-bit registers because the fpsr and
+		 * fpcr are 32-bits wide.
+		 */
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.get = fpr_get,
+		.set = fpr_set
+	},
+	[REGSET_TLS] = {
+		.core_note_type = NT_ARM_TLS,
+		.n = 1,
+		.size = sizeof(void *),
+		.align = sizeof(void *),
+		.get = tls_get,
+		.set = tls_set,
+	},
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	[REGSET_HW_BREAK] = {
+		.core_note_type = NT_ARM_HW_BREAK,
+		.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.get = hw_break_get,
+		.set = hw_break_set,
+	},
+	[REGSET_HW_WATCH] = {
+		.core_note_type = NT_ARM_HW_WATCH,
+		.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.get = hw_break_get,
+		.set = hw_break_set,
+	},
+#endif
+	[REGSET_SYSTEM_CALL] = {
+		.core_note_type = NT_ARM_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(int),
+		.align = sizeof(int),
+		.get = system_call_get,
+		.set = system_call_set,
+	},
+};
+
+static const struct user_regset_view user_aarch64_view = {
+	.name = "aarch64", .e_machine = EM_AARCH64,
+	.regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+enum compat_regset {
+	REGSET_COMPAT_GPR,
+	REGSET_COMPAT_VFP,
+};
+
+static int compat_gpr_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	int ret = 0;
+	unsigned int i, start, num_regs;
+
+	/* Calculate the number of AArch32 registers contained in count */
+	num_regs = count / regset->size;
+
+	/* Convert pos into an register number */
+	start = pos / regset->size;
+
+	if (start + num_regs > regset->n)
+		return -EIO;
+
+	for (i = 0; i < num_regs; ++i) {
+		unsigned int idx = start + i;
+		compat_ulong_t reg;
+
+		switch (idx) {
+		case 15:
+			reg = task_pt_regs(target)->pc;
+			break;
+		case 16:
+			reg = task_pt_regs(target)->pstate;
+			break;
+		case 17:
+			reg = task_pt_regs(target)->orig_x0;
+			break;
+		default:
+			reg = task_pt_regs(target)->regs[idx];
+		}
+
+		if (kbuf) {
+			memcpy(kbuf, &reg, sizeof(reg));
+			kbuf += sizeof(reg);
+		} else {
+			ret = copy_to_user(ubuf, &reg, sizeof(reg));
+			if (ret) {
+				ret = -EFAULT;
+				break;
+			}
+
+			ubuf += sizeof(reg);
+		}
+	}
+
+	return ret;
+}
+
+static int compat_gpr_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs newregs;
+	int ret = 0;
+	unsigned int i, start, num_regs;
+
+	/* Calculate the number of AArch32 registers contained in count */
+	num_regs = count / regset->size;
+
+	/* Convert pos into an register number */
+	start = pos / regset->size;
+
+	if (start + num_regs > regset->n)
+		return -EIO;
+
+	newregs = *task_pt_regs(target);
+
+	for (i = 0; i < num_regs; ++i) {
+		unsigned int idx = start + i;
+		compat_ulong_t reg;
+
+		if (kbuf) {
+			memcpy(&reg, kbuf, sizeof(reg));
+			kbuf += sizeof(reg);
+		} else {
+			ret = copy_from_user(&reg, ubuf, sizeof(reg));
+			if (ret) {
+				ret = -EFAULT;
+				break;
+			}
+
+			ubuf += sizeof(reg);
+		}
+
+		switch (idx) {
+		case 15:
+			newregs.pc = reg;
+			break;
+		case 16:
+			newregs.pstate = reg;
+			break;
+		case 17:
+			newregs.orig_x0 = reg;
+			break;
+		default:
+			newregs.regs[idx] = reg;
+		}
+
+	}
+
+	if (valid_user_regs(&newregs.user_regs))
+		*task_pt_regs(target) = newregs;
+	else
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static int compat_vfp_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	struct user_fpsimd_state *uregs;
+	compat_ulong_t fpscr;
+	int ret;
+
+	uregs = &target->thread.fpsimd_state.user_fpsimd;
+
+	/*
+	 * The VFP registers are packed into the fpsimd_state, so they all sit
+	 * nicely together for us. We just need to create the fpscr separately.
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0,
+				  VFP_STATE_SIZE - sizeof(compat_ulong_t));
+
+	if (count && !ret) {
+		fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
+			(uregs->fpcr & VFP_FPSCR_CTRL_MASK);
+		ret = put_user(fpscr, (compat_ulong_t *)ubuf);
+	}
+
+	return ret;
+}
+
+static int compat_vfp_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct user_fpsimd_state *uregs;
+	compat_ulong_t fpscr;
+	int ret;
+
+	if (pos + count > VFP_STATE_SIZE)
+		return -EIO;
+
+	uregs = &target->thread.fpsimd_state.user_fpsimd;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0,
+				 VFP_STATE_SIZE - sizeof(compat_ulong_t));
+
+	if (count && !ret) {
+		ret = get_user(fpscr, (compat_ulong_t *)ubuf);
+		uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK;
+		uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+	}
+
+	fpsimd_flush_task_state(target);
+	return ret;
+}
+
+static const struct user_regset aarch32_regsets[] = {
+	[REGSET_COMPAT_GPR] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = COMPAT_ELF_NGREG,
+		.size = sizeof(compat_elf_greg_t),
+		.align = sizeof(compat_elf_greg_t),
+		.get = compat_gpr_get,
+		.set = compat_gpr_set
+	},
+	[REGSET_COMPAT_VFP] = {
+		.core_note_type = NT_ARM_VFP,
+		.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
+		.size = sizeof(compat_ulong_t),
+		.align = sizeof(compat_ulong_t),
+		.get = compat_vfp_get,
+		.set = compat_vfp_set
+	},
+};
+
+static const struct user_regset_view user_aarch32_view = {
+	.name = "aarch32", .e_machine = EM_ARM,
+	.regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets)
+};
+
+static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
+				   compat_ulong_t __user *ret)
+{
+	compat_ulong_t tmp;
+
+	if (off & 3)
+		return -EIO;
+
+	if (off == COMPAT_PT_TEXT_ADDR)
+		tmp = tsk->mm->start_code;
+	else if (off == COMPAT_PT_DATA_ADDR)
+		tmp = tsk->mm->start_data;
+	else if (off == COMPAT_PT_TEXT_END_ADDR)
+		tmp = tsk->mm->end_code;
+	else if (off < sizeof(compat_elf_gregset_t))
+		return copy_regset_to_user(tsk, &user_aarch32_view,
+					   REGSET_COMPAT_GPR, off,
+					   sizeof(compat_ulong_t), ret);
+	else if (off >= COMPAT_USER_SZ)
+		return -EIO;
+	else
+		tmp = 0;
+
+	return put_user(tmp, ret);
+}
+
+static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,
+				    compat_ulong_t val)
+{
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	if (off & 3 || off >= COMPAT_USER_SZ)
+		return -EIO;
+
+	if (off >= sizeof(compat_elf_gregset_t))
+		return 0;
+
+	set_fs(KERNEL_DS);
+	ret = copy_regset_from_user(tsk, &user_aarch32_view,
+				    REGSET_COMPAT_GPR, off,
+				    sizeof(compat_ulong_t),
+				    &val);
+	set_fs(old_fs);
+
+	return ret;
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+/*
+ * Convert a virtual register number into an index for a thread_info
+ * breakpoint array. Breakpoints are identified using positive numbers
+ * whilst watchpoints are negative. The registers are laid out as pairs
+ * of (address, control), each pair mapping to a unique hw_breakpoint struct.
+ * Register 0 is reserved for describing resource information.
+ */
+static int compat_ptrace_hbp_num_to_idx(compat_long_t num)
+{
+	return (abs(num) - 1) >> 1;
+}
+
+static int compat_ptrace_hbp_get_resource_info(u32 *kdata)
+{
+	u8 num_brps, num_wrps, debug_arch, wp_len;
+	u32 reg = 0;
+
+	num_brps	= hw_breakpoint_slots(TYPE_INST);
+	num_wrps	= hw_breakpoint_slots(TYPE_DATA);
+
+	debug_arch	= debug_monitors_arch();
+	wp_len		= 8;
+	reg		|= debug_arch;
+	reg		<<= 8;
+	reg		|= wp_len;
+	reg		<<= 8;
+	reg		|= num_wrps;
+	reg		<<= 8;
+	reg		|= num_brps;
+
+	*kdata = reg;
+	return 0;
+}
+
+static int compat_ptrace_hbp_get(unsigned int note_type,
+				 struct task_struct *tsk,
+				 compat_long_t num,
+				 u32 *kdata)
+{
+	u64 addr = 0;
+	u32 ctrl = 0;
+
+	int err, idx = compat_ptrace_hbp_num_to_idx(num);;
+
+	if (num & 1) {
+		err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr);
+		*kdata = (u32)addr;
+	} else {
+		err = ptrace_hbp_get_ctrl(note_type, tsk, idx, &ctrl);
+		*kdata = ctrl;
+	}
+
+	return err;
+}
+
+static int compat_ptrace_hbp_set(unsigned int note_type,
+				 struct task_struct *tsk,
+				 compat_long_t num,
+				 u32 *kdata)
+{
+	u64 addr;
+	u32 ctrl;
+
+	int err, idx = compat_ptrace_hbp_num_to_idx(num);
+
+	if (num & 1) {
+		addr = *kdata;
+		err = ptrace_hbp_set_addr(note_type, tsk, idx, addr);
+	} else {
+		ctrl = *kdata;
+		err = ptrace_hbp_set_ctrl(note_type, tsk, idx, ctrl);
+	}
+
+	return err;
+}
+
+static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num,
+				    compat_ulong_t __user *data)
+{
+	int ret;
+	u32 kdata;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	/* Watchpoint */
+	if (num < 0) {
+		ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata);
+	/* Resource info */
+	} else if (num == 0) {
+		ret = compat_ptrace_hbp_get_resource_info(&kdata);
+	/* Breakpoint */
+	} else {
+		ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata);
+	}
+	set_fs(old_fs);
+
+	if (!ret)
+		ret = put_user(kdata, data);
+
+	return ret;
+}
+
+static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num,
+				    compat_ulong_t __user *data)
+{
+	int ret;
+	u32 kdata = 0;
+	mm_segment_t old_fs = get_fs();
+
+	if (num == 0)
+		return 0;
+
+	ret = get_user(kdata, data);
+	if (ret)
+		return ret;
+
+	set_fs(KERNEL_DS);
+	if (num < 0)
+		ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata);
+	else
+		ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata);
+	set_fs(old_fs);
+
+	return ret;
+}
+#endif	/* CONFIG_HAVE_HW_BREAKPOINT */
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
+{
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
+	void __user *datap = compat_ptr(data);
+	int ret;
+
+	switch (request) {
+		case PTRACE_PEEKUSR:
+			ret = compat_ptrace_read_user(child, addr, datap);
+			break;
+
+		case PTRACE_POKEUSR:
+			ret = compat_ptrace_write_user(child, addr, data);
+			break;
+
+		case COMPAT_PTRACE_GETREGS:
+			ret = copy_regset_to_user(child,
+						  &user_aarch32_view,
+						  REGSET_COMPAT_GPR,
+						  0, sizeof(compat_elf_gregset_t),
+						  datap);
+			break;
+
+		case COMPAT_PTRACE_SETREGS:
+			ret = copy_regset_from_user(child,
+						    &user_aarch32_view,
+						    REGSET_COMPAT_GPR,
+						    0, sizeof(compat_elf_gregset_t),
+						    datap);
+			break;
+
+		case COMPAT_PTRACE_GET_THREAD_AREA:
+			ret = put_user((compat_ulong_t)child->thread.tp_value,
+				       (compat_ulong_t __user *)datap);
+			break;
+
+		case COMPAT_PTRACE_SET_SYSCALL:
+			task_pt_regs(child)->syscallno = data;
+			ret = 0;
+			break;
+
+		case COMPAT_PTRACE_GETVFPREGS:
+			ret = copy_regset_to_user(child,
+						  &user_aarch32_view,
+						  REGSET_COMPAT_VFP,
+						  0, VFP_STATE_SIZE,
+						  datap);
+			break;
+
+		case COMPAT_PTRACE_SETVFPREGS:
+			ret = copy_regset_from_user(child,
+						    &user_aarch32_view,
+						    REGSET_COMPAT_VFP,
+						    0, VFP_STATE_SIZE,
+						    datap);
+			break;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		case COMPAT_PTRACE_GETHBPREGS:
+			ret = compat_ptrace_gethbpregs(child, addr, datap);
+			break;
+
+		case COMPAT_PTRACE_SETHBPREGS:
+			ret = compat_ptrace_sethbpregs(child, addr, datap);
+			break;
+#endif
+
+		default:
+			ret = compat_ptrace_request(child, request, addr,
+						    data);
+			break;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+	if (is_compat_thread(task_thread_info(task)))
+		return &user_aarch32_view;
+#endif
+	return &user_aarch64_view;
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	return ptrace_request(child, request, addr, data);
+}
+
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
+
+static void tracehook_report_syscall(struct pt_regs *regs,
+				     enum ptrace_syscall_dir dir)
+{
+	int regno;
+	unsigned long saved_reg;
+
+	/*
+	 * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
+	 * used to denote syscall entry/exit:
+	 */
+	regno = (is_compat_task() ? 12 : 7);
+	saved_reg = regs->regs[regno];
+	regs->regs[regno] = dir;
+
+	if (dir == PTRACE_SYSCALL_EXIT)
+		tracehook_report_syscall_exit(regs, 0);
+	else if (tracehook_report_syscall_entry(regs))
+		regs->syscallno = ~0UL;
+
+	regs->regs[regno] = saved_reg;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
+{
+	/* Do the secure computing check first; failures should be fast. */
+	if (secure_computing() == -1)
+		return -1;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_enter(regs, regs->syscallno);
+
+	audit_syscall_entry(regs->syscallno, regs->orig_x0, regs->regs[1],
+			    regs->regs[2], regs->regs[3]);
+
+	return regs->syscallno;
+}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	audit_syscall_exit(regs);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_exit(regs, regs_return_value(regs));
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
new file mode 100644
index 000000000..6c4fd2810
--- /dev/null
+++ b/arch/arm64/kernel/return_address.c
@@ -0,0 +1,54 @@
+/*
+ * arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/ftrace.h>
+
+#include <asm/stacktrace.h>
+
+struct return_address_data {
+	unsigned int level;
+	void *addr;
+};
+
+static int save_return_addr(struct stackframe *frame, void *d)
+{
+	struct return_address_data *data = d;
+
+	if (!data->level) {
+		data->addr = (void *)frame->pc;
+		return 1;
+	} else {
+		--data->level;
+		return 0;
+	}
+}
+
+void *return_address(unsigned int level)
+{
+	struct return_address_data data;
+	struct stackframe frame;
+
+	data.level = level + 2;
+	data.addr = NULL;
+
+	frame.fp = (unsigned long)__builtin_frame_address(0);
+	frame.sp = current_stack_pointer;
+	frame.pc = (unsigned long)return_address; /* dummy */
+
+	walk_stackframe(&frame, save_return_addr, &data);
+
+	if (!data.level)
+		return data.addr;
+	else
+		return NULL;
+}
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
new file mode 100644
index 000000000..74753132c
--- /dev/null
+++ b/arch/arm64/kernel/setup.c
@@ -0,0 +1,581 @@
+/*
+ * Based on arch/arm/kernel/setup.c
+ *
+ * Copyright (C) 1995-2001 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/acpi.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/utsname.h>
+#include <linux/initrd.h>
+#include <linux/console.h>
+#include <linux/cache.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/screen_info.h>
+#include <linux/init.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/root_dev.h>
+#include <linux/clk-provider.h>
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/memblock.h>
+#include <linux/of_iommu.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/efi.h>
+#include <linux/personality.h>
+
+#include <asm/acpi.h>
+#include <asm/fixmap.h>
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/elf.h>
+#include <asm/cpufeature.h>
+#include <asm/cpu_ops.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/traps.h>
+#include <asm/memblock.h>
+#include <asm/psci.h>
+#include <asm/efi.h>
+#include <asm/virt.h>
+
+unsigned long elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP_DEFAULT	\
+				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
+				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
+				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
+				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
+				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+				 COMPAT_HWCAP_LPAE)
+unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
+#endif
+
+DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
+phys_addr_t __fdt_pointer __initdata;
+
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+	{
+		.name = "Kernel code",
+		.start = 0,
+		.end = 0,
+		.flags = IORESOURCE_MEM
+	},
+	{
+		.name = "Kernel data",
+		.start = 0,
+		.end = 0,
+		.flags = IORESOURCE_MEM
+	}
+};
+
+#define kernel_code mem_res[0]
+#define kernel_data mem_res[1]
+
+void __init early_print(const char *str, ...)
+{
+	char buf[256];
+	va_list ap;
+
+	va_start(ap, str);
+	vsnprintf(buf, sizeof(buf), str, ap);
+	va_end(ap);
+
+	printk("%s", buf);
+}
+
+/*
+ * The recorded values of x0 .. x3 upon kernel entry.
+ */
+u64 __cacheline_aligned boot_args[4];
+
+void __init smp_setup_processor_id(void)
+{
+	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
+	cpu_logical_map(0) = mpidr;
+
+	/*
+	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
+	 * using percpu variable early, for example, lockdep will
+	 * access percpu variable inside lock_release
+	 */
+	set_my_cpu_offset(0);
+	pr_info("Booting Linux on physical CPU 0x%lx\n", (unsigned long)mpidr);
+}
+
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+	return phys_id == cpu_logical_map(cpu);
+}
+
+struct mpidr_hash mpidr_hash;
+#ifdef CONFIG_SMP
+/**
+ * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
+ *			  level in order to build a linear index from an
+ *			  MPIDR value. Resulting algorithm is a collision
+ *			  free hash carried out through shifting and ORing
+ */
+static void __init smp_build_mpidr_hash(void)
+{
+	u32 i, affinity, fs[4], bits[4], ls;
+	u64 mask = 0;
+	/*
+	 * Pre-scan the list of MPIDRS and filter out bits that do
+	 * not contribute to affinity levels, ie they never toggle.
+	 */
+	for_each_possible_cpu(i)
+		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
+	pr_debug("mask of set bits %#llx\n", mask);
+	/*
+	 * Find and stash the last and first bit set at all affinity levels to
+	 * check how many bits are required to represent them.
+	 */
+	for (i = 0; i < 4; i++) {
+		affinity = MPIDR_AFFINITY_LEVEL(mask, i);
+		/*
+		 * Find the MSB bit and LSB bits position
+		 * to determine how many bits are required
+		 * to express the affinity level.
+		 */
+		ls = fls(affinity);
+		fs[i] = affinity ? ffs(affinity) - 1 : 0;
+		bits[i] = ls - fs[i];
+	}
+	/*
+	 * An index can be created from the MPIDR_EL1 by isolating the
+	 * significant bits at each affinity level and by shifting
+	 * them in order to compress the 32 bits values space to a
+	 * compressed set of values. This is equivalent to hashing
+	 * the MPIDR_EL1 through shifting and ORing. It is a collision free
+	 * hash though not minimal since some levels might contain a number
+	 * of CPUs that is not an exact power of 2 and their bit
+	 * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}.
+	 */
+	mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0];
+	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0];
+	mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] -
+						(bits[1] + bits[0]);
+	mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) +
+				  fs[3] - (bits[2] + bits[1] + bits[0]);
+	mpidr_hash.mask = mask;
+	mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0];
+	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n",
+		mpidr_hash.shift_aff[0],
+		mpidr_hash.shift_aff[1],
+		mpidr_hash.shift_aff[2],
+		mpidr_hash.shift_aff[3],
+		mpidr_hash.mask,
+		mpidr_hash.bits);
+	/*
+	 * 4x is an arbitrary value used to warn on a hash table much bigger
+	 * than expected on most systems.
+	 */
+	if (mpidr_hash_size() > 4 * num_possible_cpus())
+		pr_warn("Large number of MPIDR hash buckets detected\n");
+	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
+}
+#endif
+
+static void __init hyp_mode_check(void)
+{
+	if (is_hyp_mode_available())
+		pr_info("CPU: All CPU(s) started at EL2\n");
+	else if (is_hyp_mode_mismatched())
+		WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC,
+			   "CPU: CPUs started in inconsistent modes");
+	else
+		pr_info("CPU: All CPU(s) started at EL1\n");
+}
+
+void __init do_post_cpus_up_work(void)
+{
+	hyp_mode_check();
+	apply_alternatives_all();
+}
+
+#ifdef CONFIG_UP_LATE_INIT
+void __init up_late_init(void)
+{
+	do_post_cpus_up_work();
+}
+#endif /* CONFIG_UP_LATE_INIT */
+
+static void __init setup_processor(void)
+{
+	u64 features, block;
+	u32 cwg;
+	int cls;
+
+	printk("CPU: AArch64 Processor [%08x] revision %d\n",
+	       read_cpuid_id(), read_cpuid_id() & 15);
+
+	sprintf(init_utsname()->machine, ELF_PLATFORM);
+	elf_hwcap = 0;
+
+	cpuinfo_store_boot_cpu();
+
+	/*
+	 * Check for sane CTR_EL0.CWG value.
+	 */
+	cwg = cache_type_cwg();
+	cls = cache_line_size();
+	if (!cwg)
+		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
+			cls);
+	if (L1_CACHE_BYTES < cls)
+		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
+			L1_CACHE_BYTES, cls);
+
+	/*
+	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
+	 * The blocks we test below represent incremental functionality
+	 * for non-negative values. Negative values are reserved.
+	 */
+	features = read_cpuid(ID_AA64ISAR0_EL1);
+	block = (features >> 4) & 0xf;
+	if (!(block & 0x8)) {
+		switch (block) {
+		default:
+		case 2:
+			elf_hwcap |= HWCAP_PMULL;
+		case 1:
+			elf_hwcap |= HWCAP_AES;
+		case 0:
+			break;
+		}
+	}
+
+	block = (features >> 8) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_SHA1;
+
+	block = (features >> 12) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_SHA2;
+
+	block = (features >> 16) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_CRC32;
+
+#ifdef CONFIG_COMPAT
+	/*
+	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
+	 * the Aarch32 32-bit execution state.
+	 */
+	features = read_cpuid(ID_ISAR5_EL1);
+	block = (features >> 4) & 0xf;
+	if (!(block & 0x8)) {
+		switch (block) {
+		default:
+		case 2:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
+		case 1:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
+		case 0:
+			break;
+		}
+	}
+
+	block = (features >> 8) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
+
+	block = (features >> 12) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
+
+	block = (features >> 16) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
+#endif
+}
+
+static void __init setup_machine_fdt(phys_addr_t dt_phys)
+{
+	if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) {
+		early_print("\n"
+			"Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n"
+			"The dtb must be 8-byte aligned and passed in the first 512MB of memory\n"
+			"\nPlease check your bootloader.\n",
+			dt_phys, phys_to_virt(dt_phys));
+
+		while (true)
+			cpu_relax();
+	}
+
+	dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name());
+}
+
+static void __init request_standard_resources(void)
+{
+	struct memblock_region *region;
+	struct resource *res;
+
+	kernel_code.start   = virt_to_phys(_text);
+	kernel_code.end     = virt_to_phys(_etext - 1);
+	kernel_data.start   = virt_to_phys(_sdata);
+	kernel_data.end     = virt_to_phys(_end - 1);
+
+	for_each_memblock(memory, region) {
+		res = alloc_bootmem_low(sizeof(*res));
+		res->name  = "System RAM";
+		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+		request_resource(&iomem_resource, res);
+
+		if (kernel_code.start >= res->start &&
+		    kernel_code.end <= res->end)
+			request_resource(res, &kernel_code);
+		if (kernel_data.start >= res->start &&
+		    kernel_data.end <= res->end)
+			request_resource(res, &kernel_data);
+	}
+}
+
+u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
+
+void __init setup_arch(char **cmdline_p)
+{
+	setup_processor();
+
+	setup_machine_fdt(__fdt_pointer);
+
+	init_mm.start_code = (unsigned long) _text;
+	init_mm.end_code   = (unsigned long) _etext;
+	init_mm.end_data   = (unsigned long) _edata;
+	init_mm.brk	   = (unsigned long) _end;
+
+	*cmdline_p = boot_command_line;
+
+	early_fixmap_init();
+	early_ioremap_init();
+
+	parse_early_param();
+
+	/*
+	 *  Unmask asynchronous aborts after bringing up possible earlycon.
+	 * (Report possible System Errors once we can report this occurred)
+	 */
+	local_async_enable();
+
+	efi_init();
+	arm64_memblock_init();
+
+	/* Parse the ACPI tables for possible boot-time configuration */
+	acpi_boot_table_init();
+
+	paging_init();
+	request_standard_resources();
+
+	early_ioremap_reset();
+
+	if (acpi_disabled) {
+		unflatten_device_tree();
+		psci_dt_init();
+		cpu_read_bootcpu_ops();
+#ifdef CONFIG_SMP
+		of_smp_init_cpus();
+#endif
+	} else {
+		psci_acpi_init();
+		acpi_init_cpus();
+	}
+
+#ifdef CONFIG_SMP
+	smp_build_mpidr_hash();
+#endif
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+	if (boot_args[1] || boot_args[2] || boot_args[3]) {
+		pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
+			"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
+			"This indicates a broken bootloader or old kernel\n",
+			boot_args[1], boot_args[2], boot_args[3]);
+	}
+}
+
+static int __init arm64_device_init(void)
+{
+	of_iommu_init();
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+	return 0;
+}
+arch_initcall_sync(arm64_device_init);
+
+static int __init topology_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
+		cpu->hotpluggable = 1;
+		register_cpu(cpu, i);
+	}
+
+	return 0;
+}
+subsys_initcall(topology_init);
+
+static const char *hwcap_str[] = {
+	"fp",
+	"asimd",
+	"evtstrm",
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+
+#ifdef CONFIG_COMPAT
+static const char *compat_hwcap_str[] = {
+	"swp",
+	"half",
+	"thumb",
+	"26bit",
+	"fastmult",
+	"fpa",
+	"vfp",
+	"edsp",
+	"java",
+	"iwmmxt",
+	"crunch",
+	"thumbee",
+	"neon",
+	"vfpv3",
+	"vfpv3d16",
+	"tls",
+	"vfpv4",
+	"idiva",
+	"idivt",
+	"vfpd32",
+	"lpae",
+	"evtstrm"
+};
+
+static const char *compat_hwcap2_str[] = {
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+#endif /* CONFIG_COMPAT */
+
+static int c_show(struct seq_file *m, void *v)
+{
+	int i, j;
+
+	for_each_online_cpu(i) {
+		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+		u32 midr = cpuinfo->reg_midr;
+
+		/*
+		 * glibc reads /proc/cpuinfo to determine the number of
+		 * online processors, looking for lines beginning with
+		 * "processor".  Give glibc what it expects.
+		 */
+#ifdef CONFIG_SMP
+		seq_printf(m, "processor\t: %d\n", i);
+#endif
+
+		/*
+		 * Dump out the common processor features in a single line.
+		 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+		 * rather than attempting to parse this, but there's a body of
+		 * software which does already (at least for 32-bit).
+		 */
+		seq_puts(m, "Features\t:");
+		if (personality(current->personality) == PER_LINUX32) {
+#ifdef CONFIG_COMPAT
+			for (j = 0; compat_hwcap_str[j]; j++)
+				if (compat_elf_hwcap & (1 << j))
+					seq_printf(m, " %s", compat_hwcap_str[j]);
+
+			for (j = 0; compat_hwcap2_str[j]; j++)
+				if (compat_elf_hwcap2 & (1 << j))
+					seq_printf(m, " %s", compat_hwcap2_str[j]);
+#endif /* CONFIG_COMPAT */
+		} else {
+			for (j = 0; hwcap_str[j]; j++)
+				if (elf_hwcap & (1 << j))
+					seq_printf(m, " %s", hwcap_str[j]);
+		}
+		seq_puts(m, "\n");
+
+		seq_printf(m, "CPU implementer\t: 0x%02x\n",
+			   MIDR_IMPLEMENTOR(midr));
+		seq_printf(m, "CPU architecture: 8\n");
+		seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+		seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+		seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
+	}
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
new file mode 100644
index 000000000..e18c48cb6
--- /dev/null
+++ b/arch/arm64/kernel/signal.c
@@ -0,0 +1,416 @@
+/*
+ * Based on arch/arm/kernel/signal.c
+ *
+ * Copyright (C) 1995-2009 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/personality.h>
+#include <linux/freezer.h>
+#include <linux/uaccess.h>
+#include <linux/tracehook.h>
+#include <linux/ratelimit.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/elf.h>
+#include <asm/cacheflush.h>
+#include <asm/ucontext.h>
+#include <asm/unistd.h>
+#include <asm/fpsimd.h>
+#include <asm/signal32.h>
+#include <asm/vdso.h>
+
+/*
+ * Do a signal return; undo the signal stack. These are aligned to 128-bit.
+ */
+struct rt_sigframe {
+	struct siginfo info;
+	struct ucontext uc;
+	u64 fp;
+	u64 lr;
+};
+
+static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
+{
+	struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
+	int err;
+
+	/* dump the hardware registers to the fpsimd_state structure */
+	fpsimd_preserve_current_state();
+
+	/* copy the FP and status/control registers */
+	err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
+	__put_user_error(fpsimd->fpsr, &ctx->fpsr, err);
+	__put_user_error(fpsimd->fpcr, &ctx->fpcr, err);
+
+	/* copy the magic/size information */
+	__put_user_error(FPSIMD_MAGIC, &ctx->head.magic, err);
+	__put_user_error(sizeof(struct fpsimd_context), &ctx->head.size, err);
+
+	return err ? -EFAULT : 0;
+}
+
+static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
+{
+	struct fpsimd_state fpsimd;
+	__u32 magic, size;
+	int err = 0;
+
+	/* check the magic/size information */
+	__get_user_error(magic, &ctx->head.magic, err);
+	__get_user_error(size, &ctx->head.size, err);
+	if (err)
+		return -EFAULT;
+	if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context))
+		return -EINVAL;
+
+	/* copy the FP and status/control registers */
+	err = __copy_from_user(fpsimd.vregs, ctx->vregs,
+			       sizeof(fpsimd.vregs));
+	__get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
+	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
+
+	/* load the hardware registers from the fpsimd_state structure */
+	if (!err)
+		fpsimd_update_current_state(&fpsimd);
+
+	return err ? -EFAULT : 0;
+}
+
+static int restore_sigframe(struct pt_regs *regs,
+			    struct rt_sigframe __user *sf)
+{
+	sigset_t set;
+	int i, err;
+	void *aux = sf->uc.uc_mcontext.__reserved;
+
+	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
+	if (err == 0)
+		set_current_blocked(&set);
+
+	for (i = 0; i < 31; i++)
+		__get_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i],
+				 err);
+	__get_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err);
+	__get_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err);
+	__get_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err);
+
+	/*
+	 * Avoid sys_rt_sigreturn() restarting.
+	 */
+	regs->syscallno = ~0UL;
+
+	err |= !valid_user_regs(&regs->user_regs);
+
+	if (err == 0) {
+		struct fpsimd_context *fpsimd_ctx =
+			container_of(aux, struct fpsimd_context, head);
+		err |= restore_fpsimd_context(fpsimd_ctx);
+	}
+
+	return err;
+}
+
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/*
+	 * Since we stacked the signal on a 128-bit boundary, then 'sp' should
+	 * be word aligned here.
+	 */
+	if (regs->sp & 15)
+		goto badframe;
+
+	frame = (struct rt_sigframe __user *)regs->sp;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+		goto badframe;
+
+	if (restore_sigframe(regs, frame))
+		goto badframe;
+
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+
+	return regs->regs[0];
+
+badframe:
+	if (show_unhandled_signals)
+		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n",
+				    current->comm, task_pid_nr(current), __func__,
+				    regs->pc, regs->sp);
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+static int setup_sigframe(struct rt_sigframe __user *sf,
+			  struct pt_regs *regs, sigset_t *set)
+{
+	int i, err = 0;
+	void *aux = sf->uc.uc_mcontext.__reserved;
+	struct _aarch64_ctx *end;
+
+	/* set up the stack frame for unwinding */
+	__put_user_error(regs->regs[29], &sf->fp, err);
+	__put_user_error(regs->regs[30], &sf->lr, err);
+
+	for (i = 0; i < 31; i++)
+		__put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i],
+				 err);
+	__put_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err);
+	__put_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err);
+	__put_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err);
+
+	__put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err);
+
+	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
+
+	if (err == 0) {
+		struct fpsimd_context *fpsimd_ctx =
+			container_of(aux, struct fpsimd_context, head);
+		err |= preserve_fpsimd_context(fpsimd_ctx);
+		aux += sizeof(*fpsimd_ctx);
+	}
+
+	/* fault information, if valid */
+	if (current->thread.fault_code) {
+		struct esr_context *esr_ctx =
+			container_of(aux, struct esr_context, head);
+		__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err);
+		__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err);
+		__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
+		aux += sizeof(*esr_ctx);
+	}
+
+	/* set the "end" magic */
+	end = aux;
+	__put_user_error(0, &end->magic, err);
+	__put_user_error(0, &end->size, err);
+
+	return err;
+}
+
+static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig,
+					       struct pt_regs *regs)
+{
+	unsigned long sp, sp_top;
+	struct rt_sigframe __user *frame;
+
+	sp = sp_top = sigsp(regs->sp, ksig);
+
+	sp = (sp - sizeof(struct rt_sigframe)) & ~15;
+	frame = (struct rt_sigframe __user *)sp;
+
+	/*
+	 * Check that we can actually write to the signal frame.
+	 */
+	if (!access_ok(VERIFY_WRITE, frame, sp_top - sp))
+		frame = NULL;
+
+	return frame;
+}
+
+static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+			 void __user *frame, int usig)
+{
+	__sigrestore_t sigtramp;
+
+	regs->regs[0] = usig;
+	regs->sp = (unsigned long)frame;
+	regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp);
+	regs->pc = (unsigned long)ka->sa.sa_handler;
+
+	if (ka->sa.sa_flags & SA_RESTORER)
+		sigtramp = ka->sa.sa_restorer;
+	else
+		sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+
+	regs->regs[30] = (unsigned long)sigtramp;
+}
+
+static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
+			  struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	int err = 0;
+
+	frame = get_sigframe(ksig, regs);
+	if (!frame)
+		return 1;
+
+	__put_user_error(0, &frame->uc.uc_flags, err);
+	__put_user_error(NULL, &frame->uc.uc_link, err);
+
+	err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+	err |= setup_sigframe(frame, regs, set);
+	if (err == 0) {
+		setup_return(regs, &ksig->ka, frame, usig);
+		if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+			err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+			regs->regs[1] = (unsigned long)&frame->info;
+			regs->regs[2] = (unsigned long)&frame->uc;
+		}
+	}
+
+	return err;
+}
+
+static void setup_restart_syscall(struct pt_regs *regs)
+{
+	if (is_compat_task())
+		compat_setup_restart_syscall(regs);
+	else
+		regs->regs[8] = __NR_restart_syscall;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	sigset_t *oldset = sigmask_to_save();
+	int usig = ksig->sig;
+	int ret;
+
+	/*
+	 * Set up the stack frame
+	 */
+	if (is_compat_task()) {
+		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+			ret = compat_setup_rt_frame(usig, ksig, oldset, regs);
+		else
+			ret = compat_setup_frame(usig, ksig, oldset, regs);
+	} else {
+		ret = setup_rt_frame(usig, ksig, oldset, regs);
+	}
+
+	/*
+	 * Check that the resulting registers are actually sane.
+	 */
+	ret |= !valid_user_regs(&regs->user_regs);
+
+	/*
+	 * Fast forward the stepping logic so we step into the signal
+	 * handler.
+	 */
+	if (!ret)
+		user_fastforward_single_step(tsk);
+
+	signal_setup_done(ret, ksig, 0);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+static void do_signal(struct pt_regs *regs)
+{
+	unsigned long continue_addr = 0, restart_addr = 0;
+	int retval = 0;
+	int syscall = (int)regs->syscallno;
+	struct ksignal ksig;
+
+	/*
+	 * If we were from a system call, check for system call restarting...
+	 */
+	if (syscall >= 0) {
+		continue_addr = regs->pc;
+		restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
+		retval = regs->regs[0];
+
+		/*
+		 * Avoid additional syscall restarting via ret_to_user.
+		 */
+		regs->syscallno = ~0UL;
+
+		/*
+		 * Prepare for system call restart. We do this here so that a
+		 * debugger will see the already changed PC.
+		 */
+		switch (retval) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+		case -ERESTART_RESTARTBLOCK:
+			regs->regs[0] = regs->orig_x0;
+			regs->pc = restart_addr;
+			break;
+		}
+	}
+
+	/*
+	 * Get the signal to deliver. When running under ptrace, at this point
+	 * the debugger may change all of our registers.
+	 */
+	if (get_signal(&ksig)) {
+		/*
+		 * Depending on the signal settings, we may need to revert the
+		 * decision to restart the system call, but skip this if a
+		 * debugger has chosen to restart at a different PC.
+		 */
+		if (regs->pc == restart_addr &&
+		    (retval == -ERESTARTNOHAND ||
+		     retval == -ERESTART_RESTARTBLOCK ||
+		     (retval == -ERESTARTSYS &&
+		      !(ksig.ka.sa.sa_flags & SA_RESTART)))) {
+			regs->regs[0] = -EINTR;
+			regs->pc = continue_addr;
+		}
+
+		handle_signal(&ksig, regs);
+		return;
+	}
+
+	/*
+	 * Handle restarting a different system call. As above, if a debugger
+	 * has chosen to restart at a different PC, ignore the restart.
+	 */
+	if (syscall >= 0 && regs->pc == restart_addr) {
+		if (retval == -ERESTART_RESTARTBLOCK)
+			setup_restart_syscall(regs);
+		user_rewind_single_step(current);
+	}
+
+	restore_saved_sigmask();
+}
+
+asmlinkage void do_notify_resume(struct pt_regs *regs,
+				 unsigned int thread_flags)
+{
+	if (thread_flags & _TIF_SIGPENDING)
+		do_signal(regs);
+
+	if (thread_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+
+	if (thread_flags & _TIF_FOREIGN_FPSTATE)
+		fpsimd_restore_current_state();
+
+}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
new file mode 100644
index 000000000..d26fcd4cd
--- /dev/null
+++ b/arch/arm64/kernel/signal32.c
@@ -0,0 +1,577 @@
+/*
+ * Based on arch/arm/kernel/signal.c
+ *
+ * Copyright (C) 1995-2009 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ * Modified by Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compat.h>
+#include <linux/signal.h>
+#include <linux/syscalls.h>
+#include <linux/ratelimit.h>
+
+#include <asm/esr.h>
+#include <asm/fpsimd.h>
+#include <asm/signal32.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+struct compat_sigcontext {
+	/* We always set these two fields to 0 */
+	compat_ulong_t			trap_no;
+	compat_ulong_t			error_code;
+
+	compat_ulong_t			oldmask;
+	compat_ulong_t			arm_r0;
+	compat_ulong_t			arm_r1;
+	compat_ulong_t			arm_r2;
+	compat_ulong_t			arm_r3;
+	compat_ulong_t			arm_r4;
+	compat_ulong_t			arm_r5;
+	compat_ulong_t			arm_r6;
+	compat_ulong_t			arm_r7;
+	compat_ulong_t			arm_r8;
+	compat_ulong_t			arm_r9;
+	compat_ulong_t			arm_r10;
+	compat_ulong_t			arm_fp;
+	compat_ulong_t			arm_ip;
+	compat_ulong_t			arm_sp;
+	compat_ulong_t			arm_lr;
+	compat_ulong_t			arm_pc;
+	compat_ulong_t			arm_cpsr;
+	compat_ulong_t			fault_address;
+};
+
+struct compat_ucontext {
+	compat_ulong_t			uc_flags;
+	compat_uptr_t			uc_link;
+	compat_stack_t			uc_stack;
+	struct compat_sigcontext	uc_mcontext;
+	compat_sigset_t			uc_sigmask;
+	int		__unused[32 - (sizeof (compat_sigset_t) / sizeof (int))];
+	compat_ulong_t	uc_regspace[128] __attribute__((__aligned__(8)));
+};
+
+struct compat_vfp_sigframe {
+	compat_ulong_t	magic;
+	compat_ulong_t	size;
+	struct compat_user_vfp {
+		compat_u64	fpregs[32];
+		compat_ulong_t	fpscr;
+	} ufp;
+	struct compat_user_vfp_exc {
+		compat_ulong_t	fpexc;
+		compat_ulong_t	fpinst;
+		compat_ulong_t	fpinst2;
+	} ufp_exc;
+} __attribute__((__aligned__(8)));
+
+#define VFP_MAGIC		0x56465001
+#define VFP_STORAGE_SIZE	sizeof(struct compat_vfp_sigframe)
+
+#define FSR_WRITE_SHIFT		(11)
+
+struct compat_aux_sigframe {
+	struct compat_vfp_sigframe	vfp;
+
+	/* Something that isn't a valid magic number for any coprocessor.  */
+	unsigned long			end_magic;
+} __attribute__((__aligned__(8)));
+
+struct compat_sigframe {
+	struct compat_ucontext	uc;
+	compat_ulong_t		retcode[2];
+};
+
+struct compat_rt_sigframe {
+	struct compat_siginfo info;
+	struct compat_sigframe sig;
+};
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
+{
+	compat_sigset_t	cset;
+
+	cset.sig[0] = set->sig[0] & 0xffffffffull;
+	cset.sig[1] = set->sig[0] >> 32;
+
+	return copy_to_user(uset, &cset, sizeof(*uset));
+}
+
+static inline int get_sigset_t(sigset_t *set,
+			       const compat_sigset_t __user *uset)
+{
+	compat_sigset_t s32;
+
+	if (copy_from_user(&s32, uset, sizeof(*uset)))
+		return -EFAULT;
+
+	set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+	return 0;
+}
+
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
+{
+	int err;
+
+	if (!access_ok(VERIFY_WRITE, to, sizeof(*to)))
+		return -EFAULT;
+
+	/* If you change siginfo_t structure, please be sure
+	 * this code is fixed accordingly.
+	 * It should never copy any pad contained in the structure
+	 * to avoid security leaks, but must copy the generic
+	 * 3 ints plus the relevant union member.
+	 * This routine must convert siginfo from 64bit to 32bit as well
+	 * at the same time.
+	 */
+	err = __put_user(from->si_signo, &to->si_signo);
+	err |= __put_user(from->si_errno, &to->si_errno);
+	err |= __put_user((short)from->si_code, &to->si_code);
+	if (from->si_code < 0)
+		err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad,
+				      SI_PAD_SIZE);
+	else switch (from->si_code & __SI_MASK) {
+	case __SI_KILL:
+		err |= __put_user(from->si_pid, &to->si_pid);
+		err |= __put_user(from->si_uid, &to->si_uid);
+		break;
+	case __SI_TIMER:
+		 err |= __put_user(from->si_tid, &to->si_tid);
+		 err |= __put_user(from->si_overrun, &to->si_overrun);
+		 err |= __put_user(from->si_int, &to->si_int);
+		break;
+	case __SI_POLL:
+		err |= __put_user(from->si_band, &to->si_band);
+		err |= __put_user(from->si_fd, &to->si_fd);
+		break;
+	case __SI_FAULT:
+		err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr,
+				  &to->si_addr);
+#ifdef BUS_MCEERR_AO
+		/*
+		 * Other callers might not initialize the si_lsb field,
+		 * so check explicitely for the right codes here.
+		 */
+		if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
+			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
+#endif
+		break;
+	case __SI_CHLD:
+		err |= __put_user(from->si_pid, &to->si_pid);
+		err |= __put_user(from->si_uid, &to->si_uid);
+		err |= __put_user(from->si_status, &to->si_status);
+		err |= __put_user(from->si_utime, &to->si_utime);
+		err |= __put_user(from->si_stime, &to->si_stime);
+		break;
+	case __SI_RT: /* This is not generated by the kernel as of now. */
+	case __SI_MESGQ: /* But this is */
+		err |= __put_user(from->si_pid, &to->si_pid);
+		err |= __put_user(from->si_uid, &to->si_uid);
+		err |= __put_user(from->si_int, &to->si_int);
+		break;
+	case __SI_SYS:
+		err |= __put_user((compat_uptr_t)(unsigned long)
+				from->si_call_addr, &to->si_call_addr);
+		err |= __put_user(from->si_syscall, &to->si_syscall);
+		err |= __put_user(from->si_arch, &to->si_arch);
+		break;
+	default: /* this is just in case for now ... */
+		err |= __put_user(from->si_pid, &to->si_pid);
+		err |= __put_user(from->si_uid, &to->si_uid);
+		break;
+	}
+	return err;
+}
+
+int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+{
+	memset(to, 0, sizeof *to);
+
+	if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) ||
+	    copy_from_user(to->_sifields._pad,
+			   from->_sifields._pad, SI_PAD_SIZE))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * VFP save/restore code.
+ */
+static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
+{
+	struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
+	compat_ulong_t magic = VFP_MAGIC;
+	compat_ulong_t size = VFP_STORAGE_SIZE;
+	compat_ulong_t fpscr, fpexc;
+	int err = 0;
+
+	/*
+	 * Save the hardware registers to the fpsimd_state structure.
+	 * Note that this also saves V16-31, which aren't visible
+	 * in AArch32.
+	 */
+	fpsimd_preserve_current_state();
+
+	/* Place structure header on the stack */
+	__put_user_error(magic, &frame->magic, err);
+	__put_user_error(size, &frame->size, err);
+
+	/*
+	 * Now copy the FP registers. Since the registers are packed,
+	 * we can copy the prefix we want (V0-V15) as it is.
+	 * FIXME: Won't work if big endian.
+	 */
+	err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs,
+			      sizeof(frame->ufp.fpregs));
+
+	/* Create an AArch32 fpscr from the fpsr and the fpcr. */
+	fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) |
+		(fpsimd->fpcr & VFP_FPSCR_CTRL_MASK);
+	__put_user_error(fpscr, &frame->ufp.fpscr, err);
+
+	/*
+	 * The exception register aren't available so we fake up a
+	 * basic FPEXC and zero everything else.
+	 */
+	fpexc = (1 << 30);
+	__put_user_error(fpexc, &frame->ufp_exc.fpexc, err);
+	__put_user_error(0, &frame->ufp_exc.fpinst, err);
+	__put_user_error(0, &frame->ufp_exc.fpinst2, err);
+
+	return err ? -EFAULT : 0;
+}
+
+static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
+{
+	struct fpsimd_state fpsimd;
+	compat_ulong_t magic = VFP_MAGIC;
+	compat_ulong_t size = VFP_STORAGE_SIZE;
+	compat_ulong_t fpscr;
+	int err = 0;
+
+	__get_user_error(magic, &frame->magic, err);
+	__get_user_error(size, &frame->size, err);
+
+	if (err)
+		return -EFAULT;
+	if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE)
+		return -EINVAL;
+
+	/*
+	 * Copy the FP registers into the start of the fpsimd_state.
+	 * FIXME: Won't work if big endian.
+	 */
+	err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs,
+				sizeof(frame->ufp.fpregs));
+
+	/* Extract the fpsr and the fpcr from the fpscr */
+	__get_user_error(fpscr, &frame->ufp.fpscr, err);
+	fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK;
+	fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+
+	/*
+	 * We don't need to touch the exception register, so
+	 * reload the hardware state.
+	 */
+	if (!err)
+		fpsimd_update_current_state(&fpsimd);
+
+	return err ? -EFAULT : 0;
+}
+
+static int compat_restore_sigframe(struct pt_regs *regs,
+				   struct compat_sigframe __user *sf)
+{
+	int err;
+	sigset_t set;
+	struct compat_aux_sigframe __user *aux;
+
+	err = get_sigset_t(&set, &sf->uc.uc_sigmask);
+	if (err == 0) {
+		sigdelsetmask(&set, ~_BLOCKABLE);
+		set_current_blocked(&set);
+	}
+
+	__get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err);
+	__get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err);
+	__get_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err);
+	__get_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err);
+	__get_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err);
+	__get_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err);
+	__get_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err);
+	__get_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err);
+	__get_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err);
+	__get_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err);
+	__get_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err);
+	__get_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err);
+	__get_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err);
+	__get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err);
+	__get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err);
+	__get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err);
+	__get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err);
+
+	/*
+	 * Avoid compat_sys_sigreturn() restarting.
+	 */
+	regs->syscallno = ~0UL;
+
+	err |= !valid_user_regs(&regs->user_regs);
+
+	aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
+	if (err == 0)
+		err |= compat_restore_vfp_context(&aux->vfp);
+
+	return err;
+}
+
+asmlinkage int compat_sys_sigreturn(struct pt_regs *regs)
+{
+	struct compat_sigframe __user *frame;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/*
+	 * Since we stacked the signal on a 64-bit boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->compat_sp & 7)
+		goto badframe;
+
+	frame = (struct compat_sigframe __user *)regs->compat_sp;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+		goto badframe;
+
+	if (compat_restore_sigframe(regs, frame))
+		goto badframe;
+
+	return regs->regs[0];
+
+badframe:
+	if (show_unhandled_signals)
+		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n",
+				    current->comm, task_pid_nr(current), __func__,
+				    regs->pc, regs->sp);
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct compat_rt_sigframe __user *frame;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/*
+	 * Since we stacked the signal on a 64-bit boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->compat_sp & 7)
+		goto badframe;
+
+	frame = (struct compat_rt_sigframe __user *)regs->compat_sp;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+		goto badframe;
+
+	if (compat_restore_sigframe(regs, &frame->sig))
+		goto badframe;
+
+	if (compat_restore_altstack(&frame->sig.uc.uc_stack))
+		goto badframe;
+
+	return regs->regs[0];
+
+badframe:
+	if (show_unhandled_signals)
+		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n",
+				    current->comm, task_pid_nr(current), __func__,
+				    regs->pc, regs->sp);
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+static void __user *compat_get_sigframe(struct ksignal *ksig,
+					struct pt_regs *regs,
+					int framesize)
+{
+	compat_ulong_t sp = sigsp(regs->compat_sp, ksig);
+	void __user *frame;
+
+	/*
+	 * ATPCS B01 mandates 8-byte alignment
+	 */
+	frame = compat_ptr((compat_uptr_t)((sp - framesize) & ~7));
+
+	/*
+	 * Check that we can actually write to the signal frame.
+	 */
+	if (!access_ok(VERIFY_WRITE, frame, framesize))
+		frame = NULL;
+
+	return frame;
+}
+
+static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+				compat_ulong_t __user *rc, void __user *frame,
+				int usig)
+{
+	compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler);
+	compat_ulong_t retcode;
+	compat_ulong_t spsr = regs->pstate & ~(PSR_f | COMPAT_PSR_E_BIT);
+	int thumb;
+
+	/* Check if the handler is written for ARM or Thumb */
+	thumb = handler & 1;
+
+	if (thumb)
+		spsr |= COMPAT_PSR_T_BIT;
+	else
+		spsr &= ~COMPAT_PSR_T_BIT;
+
+	/* The IT state must be cleared for both ARM and Thumb-2 */
+	spsr &= ~COMPAT_PSR_IT_MASK;
+
+	/* Restore the original endianness */
+	spsr |= COMPAT_PSR_ENDSTATE;
+
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		retcode = ptr_to_compat(ka->sa.sa_restorer);
+	} else {
+		/* Set up sigreturn pointer */
+		unsigned int idx = thumb << 1;
+
+		if (ka->sa.sa_flags & SA_SIGINFO)
+			idx += 3;
+
+		retcode = AARCH32_VECTORS_BASE +
+			  AARCH32_KERN_SIGRET_CODE_OFFSET +
+			  (idx << 2) + thumb;
+	}
+
+	regs->regs[0]	= usig;
+	regs->compat_sp	= ptr_to_compat(frame);
+	regs->compat_lr	= retcode;
+	regs->pc	= handler;
+	regs->pstate	= spsr;
+}
+
+static int compat_setup_sigframe(struct compat_sigframe __user *sf,
+				 struct pt_regs *regs, sigset_t *set)
+{
+	struct compat_aux_sigframe __user *aux;
+	int err = 0;
+
+	__put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err);
+	__put_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err);
+	__put_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err);
+	__put_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err);
+	__put_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err);
+	__put_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err);
+	__put_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err);
+	__put_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err);
+	__put_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err);
+	__put_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err);
+	__put_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err);
+	__put_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err);
+	__put_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err);
+	__put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err);
+	__put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err);
+	__put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err);
+	__put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err);
+
+	__put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err);
+	/* set the compat FSR WnR */
+	__put_user_error(!!(current->thread.fault_code & ESR_ELx_WNR) <<
+			 FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err);
+	__put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err);
+	__put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err);
+
+	err |= put_sigset_t(&sf->uc.uc_sigmask, set);
+
+	aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
+
+	if (err == 0)
+		err |= compat_preserve_vfp_context(&aux->vfp);
+	__put_user_error(0, &aux->end_magic, err);
+
+	return err;
+}
+
+/*
+ * 32-bit signal handling routines called from signal.c
+ */
+int compat_setup_rt_frame(int usig, struct ksignal *ksig,
+			  sigset_t *set, struct pt_regs *regs)
+{
+	struct compat_rt_sigframe __user *frame;
+	int err = 0;
+
+	frame = compat_get_sigframe(ksig, regs, sizeof(*frame));
+
+	if (!frame)
+		return 1;
+
+	err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
+
+	__put_user_error(0, &frame->sig.uc.uc_flags, err);
+	__put_user_error(0, &frame->sig.uc.uc_link, err);
+
+	err |= __compat_save_altstack(&frame->sig.uc.uc_stack, regs->compat_sp);
+
+	err |= compat_setup_sigframe(&frame->sig, regs, set);
+
+	if (err == 0) {
+		compat_setup_return(regs, &ksig->ka, frame->sig.retcode, frame, usig);
+		regs->regs[1] = (compat_ulong_t)(unsigned long)&frame->info;
+		regs->regs[2] = (compat_ulong_t)(unsigned long)&frame->sig.uc;
+	}
+
+	return err;
+}
+
+int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
+		       struct pt_regs *regs)
+{
+	struct compat_sigframe __user *frame;
+	int err = 0;
+
+	frame = compat_get_sigframe(ksig, regs, sizeof(*frame));
+
+	if (!frame)
+		return 1;
+
+	__put_user_error(0x5ac3c35a, &frame->uc.uc_flags, err);
+
+	err |= compat_setup_sigframe(frame, regs, set);
+	if (err == 0)
+		compat_setup_return(regs, &ksig->ka, frame->retcode, frame, usig);
+
+	return err;
+}
+
+void compat_setup_restart_syscall(struct pt_regs *regs)
+{
+       regs->regs[7] = __NR_compat_restart_syscall;
+}
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
new file mode 100644
index 000000000..ede186cdd
--- /dev/null
+++ b/arch/arm64/kernel/sleep.S
@@ -0,0 +1,181 @@
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+
+	.text
+/*
+ * Implementation of MPIDR_EL1 hash algorithm through shifting
+ * and OR'ing.
+ *
+ * @dst: register containing hash result
+ * @rs0: register containing affinity level 0 bit shift
+ * @rs1: register containing affinity level 1 bit shift
+ * @rs2: register containing affinity level 2 bit shift
+ * @rs3: register containing affinity level 3 bit shift
+ * @mpidr: register containing MPIDR_EL1 value
+ * @mask: register containing MPIDR mask
+ *
+ * Pseudo C-code:
+ *
+ *u32 dst;
+ *
+ *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) {
+ *	u32 aff0, aff1, aff2, aff3;
+ *	u64 mpidr_masked = mpidr & mask;
+ *	aff0 = mpidr_masked & 0xff;
+ *	aff1 = mpidr_masked & 0xff00;
+ *	aff2 = mpidr_masked & 0xff0000;
+ *	aff2 = mpidr_masked & 0xff00000000;
+ *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
+ *}
+ * Input registers: rs0, rs1, rs2, rs3, mpidr, mask
+ * Output register: dst
+ * Note: input and output registers must be disjoint register sets
+         (eg: a macro instance with mpidr = x1 and dst = x1 is invalid)
+ */
+	.macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask
+	and	\mpidr, \mpidr, \mask		// mask out MPIDR bits
+	and	\dst, \mpidr, #0xff		// mask=aff0
+	lsr	\dst ,\dst, \rs0		// dst=aff0>>rs0
+	and	\mask, \mpidr, #0xff00		// mask = aff1
+	lsr	\mask ,\mask, \rs1
+	orr	\dst, \dst, \mask		// dst|=(aff1>>rs1)
+	and	\mask, \mpidr, #0xff0000	// mask = aff2
+	lsr	\mask ,\mask, \rs2
+	orr	\dst, \dst, \mask		// dst|=(aff2>>rs2)
+	and	\mask, \mpidr, #0xff00000000	// mask = aff3
+	lsr	\mask ,\mask, \rs3
+	orr	\dst, \dst, \mask		// dst|=(aff3>>rs3)
+	.endm
+/*
+ * Save CPU state for a suspend and execute the suspend finisher.
+ * On success it will return 0 through cpu_resume - ie through a CPU
+ * soft/hard reboot from the reset vector.
+ * On failure it returns the suspend finisher return value or force
+ * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
+ * is not allowed to return, if it does this must be considered failure).
+ * It saves callee registers, and allocates space on the kernel stack
+ * to save the CPU specific registers + some other data for resume.
+ *
+ *  x0 = suspend finisher argument
+ *  x1 = suspend finisher function pointer
+ */
+ENTRY(__cpu_suspend_enter)
+	stp	x29, lr, [sp, #-96]!
+	stp	x19, x20, [sp,#16]
+	stp	x21, x22, [sp,#32]
+	stp	x23, x24, [sp,#48]
+	stp	x25, x26, [sp,#64]
+	stp	x27, x28, [sp,#80]
+	/*
+	 * Stash suspend finisher and its argument in x20 and x19
+	 */
+	mov	x19, x0
+	mov	x20, x1
+	mov	x2, sp
+	sub	sp, sp, #CPU_SUSPEND_SZ	// allocate cpu_suspend_ctx
+	mov	x0, sp
+	/*
+	 * x0 now points to struct cpu_suspend_ctx allocated on the stack
+	 */
+	str	x2, [x0, #CPU_CTX_SP]
+	ldr	x1, =sleep_save_sp
+	ldr	x1, [x1, #SLEEP_SAVE_SP_VIRT]
+#ifdef CONFIG_SMP
+	mrs	x7, mpidr_el1
+	ldr	x9, =mpidr_hash
+	ldr	x10, [x9, #MPIDR_HASH_MASK]
+	/*
+	 * Following code relies on the struct mpidr_hash
+	 * members size.
+	 */
+	ldp	w3, w4, [x9, #MPIDR_HASH_SHIFTS]
+	ldp	w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
+	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
+	add	x1, x1, x8, lsl #3
+#endif
+	bl	__cpu_suspend_save
+	/*
+	 * Grab suspend finisher in x20 and its argument in x19
+	 */
+	mov	x0, x19
+	mov	x1, x20
+	/*
+	 * We are ready for power down, fire off the suspend finisher
+	 * in x1, with argument in x0
+	 */
+	blr	x1
+        /*
+	 * Never gets here, unless suspend finisher fails.
+	 * Successful cpu_suspend should return from cpu_resume, returning
+	 * through this code path is considered an error
+	 * If the return value is set to 0 force x0 = -EOPNOTSUPP
+	 * to make sure a proper error condition is propagated
+	 */
+	cmp	x0, #0
+	mov	x3, #-EOPNOTSUPP
+	csel	x0, x3, x0, eq
+	add	sp, sp, #CPU_SUSPEND_SZ	// rewind stack pointer
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldp	x25, x26, [sp, #64]
+	ldp	x27, x28, [sp, #80]
+	ldp	x29, lr, [sp], #96
+	ret
+ENDPROC(__cpu_suspend_enter)
+	.ltorg
+
+/*
+ * x0 must contain the sctlr value retrieved from restored context
+ */
+ENTRY(cpu_resume_mmu)
+	ldr	x3, =cpu_resume_after_mmu
+	msr	sctlr_el1, x0		// restore sctlr_el1
+	isb
+	br	x3			// global jump to virtual address
+ENDPROC(cpu_resume_mmu)
+cpu_resume_after_mmu:
+	mov	x0, #0			// return zero on success
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldp	x25, x26, [sp, #64]
+	ldp	x27, x28, [sp, #80]
+	ldp	x29, lr, [sp], #96
+	ret
+ENDPROC(cpu_resume_after_mmu)
+
+ENTRY(cpu_resume)
+	bl	el2_setup		// if in EL2 drop to EL1 cleanly
+#ifdef CONFIG_SMP
+	mrs	x1, mpidr_el1
+	adrp	x8, mpidr_hash
+	add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
+        /* retrieve mpidr_hash members to compute the hash */
+	ldr	x2, [x8, #MPIDR_HASH_MASK]
+	ldp	w3, w4, [x8, #MPIDR_HASH_SHIFTS]
+	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
+	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
+        /* x7 contains hash index, let's use it to grab context pointer */
+#else
+	mov	x7, xzr
+#endif
+	adrp	x0, sleep_save_sp
+	add	x0, x0, #:lo12:sleep_save_sp
+	ldr	x0, [x0, #SLEEP_SAVE_SP_PHYS]
+	ldr	x0, [x0, x7, lsl #3]
+	/* load sp from context */
+	ldr	x2, [x0, #CPU_CTX_SP]
+	adrp	x1, sleep_idmap_phys
+	/* load physical address of identity map page table in x1 */
+	ldr	x1, [x1, #:lo12:sleep_idmap_phys]
+	mov	sp, x2
+	/*
+	 * cpu_do_resume expects x0 to contain context physical address
+	 * pointer and x1 to contain physical address of 1:1 page tables
+	 */
+	bl	cpu_do_resume		// PC relative jump, MMU off
+	b	cpu_resume_mmu		// Resume MMU, never returns
+ENDPROC(cpu_resume)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
new file mode 100644
index 000000000..d3a202b85
--- /dev/null
+++ b/arch/arm64/kernel/smp.c
@@ -0,0 +1,659 @@
+/*
+ * SMP initialisation and IPI support
+ * Based on arch/arm/kernel/smp.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/seq_file.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/clockchips.h>
+#include <linux/completion.h>
+#include <linux/of.h>
+#include <linux/irq_work.h>
+
+#include <asm/alternative.h>
+#include <asm/atomic.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/cpu_ops.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/smp_plat.h>
+#include <asm/sections.h>
+#include <asm/tlbflush.h>
+#include <asm/ptrace.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/ipi.h>
+
+/*
+ * as from 2.5, kernels no longer have an init_tasks structure
+ * so we need some other way of telling a new secondary core
+ * where to place its SVC stack
+ */
+struct secondary_data secondary_data;
+
+enum ipi_msg_type {
+	IPI_RESCHEDULE,
+	IPI_CALL_FUNC,
+	IPI_CPU_STOP,
+	IPI_TIMER,
+	IPI_IRQ_WORK,
+};
+
+/*
+ * Boot a secondary CPU, and assign it the specified idle task.
+ * This also gives us the initial stack to use for this CPU.
+ */
+static int boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	if (cpu_ops[cpu]->cpu_boot)
+		return cpu_ops[cpu]->cpu_boot(cpu);
+
+	return -EOPNOTSUPP;
+}
+
+static DECLARE_COMPLETION(cpu_running);
+
+int __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	int ret;
+
+	/*
+	 * We need to tell the secondary core where to find its stack and the
+	 * page tables.
+	 */
+	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
+
+	/*
+	 * Now bring the CPU into our world.
+	 */
+	ret = boot_secondary(cpu, idle);
+	if (ret == 0) {
+		/*
+		 * CPU was successfully started, wait for it to come online or
+		 * time out.
+		 */
+		wait_for_completion_timeout(&cpu_running,
+					    msecs_to_jiffies(1000));
+
+		if (!cpu_online(cpu)) {
+			pr_crit("CPU%u: failed to come online\n", cpu);
+			ret = -EIO;
+		}
+	} else {
+		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
+	}
+
+	secondary_data.stack = NULL;
+
+	return ret;
+}
+
+static void smp_store_cpu_info(unsigned int cpuid)
+{
+	store_cpu_topology(cpuid);
+}
+
+/*
+ * This is the secondary CPU boot entry.  We're using this CPUs
+ * idle thread stack, but a set of temporary page tables.
+ */
+asmlinkage void secondary_start_kernel(void)
+{
+	struct mm_struct *mm = &init_mm;
+	unsigned int cpu = smp_processor_id();
+
+	/*
+	 * All kernel threads share the same mm context; grab a
+	 * reference and switch to it.
+	 */
+	atomic_inc(&mm->mm_count);
+	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+	printk("CPU%u: Booted secondary processor\n", cpu);
+
+	/*
+	 * TTBR0 is only used for the identity mapping at this stage. Make it
+	 * point to zero page to avoid speculatively fetching new entries.
+	 */
+	cpu_set_reserved_ttbr0();
+	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
+
+	preempt_disable();
+	trace_hardirqs_off();
+
+	if (cpu_ops[cpu]->cpu_postboot)
+		cpu_ops[cpu]->cpu_postboot();
+
+	/*
+	 * Log the CPU info before it is marked online and might get read.
+	 */
+	cpuinfo_store_cpu();
+
+	/*
+	 * Enable GIC and timers.
+	 */
+	notify_cpu_starting(cpu);
+
+	smp_store_cpu_info(cpu);
+
+	/*
+	 * OK, now it's safe to let the boot CPU continue.  Wait for
+	 * the CPU migration code to notice that the CPU is online
+	 * before we continue.
+	 */
+	set_cpu_online(cpu, true);
+	complete(&cpu_running);
+
+	local_dbg_enable();
+	local_irq_enable();
+	local_async_enable();
+
+	/*
+	 * OK, it's off to the idle thread for us
+	 */
+	cpu_startup_entry(CPUHP_ONLINE);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_disable(unsigned int cpu)
+{
+	/*
+	 * If we don't have a cpu_die method, abort before we reach the point
+	 * of no return. CPU0 may not have an cpu_ops, so test for it.
+	 */
+	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
+		return -EOPNOTSUPP;
+
+	/*
+	 * We may need to abort a hot unplug for some other mechanism-specific
+	 * reason.
+	 */
+	if (cpu_ops[cpu]->cpu_disable)
+		return cpu_ops[cpu]->cpu_disable(cpu);
+
+	return 0;
+}
+
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+	int ret;
+
+	ret = op_cpu_disable(cpu);
+	if (ret)
+		return ret;
+
+	/*
+	 * Take this CPU offline.  Once we clear this, we can't return,
+	 * and we must not schedule until we're ready to give up the cpu.
+	 */
+	set_cpu_online(cpu, false);
+
+	/*
+	 * OK - migrate IRQs away from this CPU
+	 */
+	migrate_irqs();
+
+	/*
+	 * Remove this CPU from the vm mask set of all processes.
+	 */
+	clear_tasks_mm_cpumask(cpu);
+
+	return 0;
+}
+
+static int op_cpu_kill(unsigned int cpu)
+{
+	/*
+	 * If we have no means of synchronising with the dying CPU, then assume
+	 * that it is really dead. We can only wait for an arbitrary length of
+	 * time and hope that it's dead, so let's skip the wait and just hope.
+	 */
+	if (!cpu_ops[cpu]->cpu_kill)
+		return 1;
+
+	return cpu_ops[cpu]->cpu_kill(cpu);
+}
+
+static DECLARE_COMPLETION(cpu_died);
+
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpu_die(unsigned int cpu)
+{
+	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+		pr_crit("CPU%u: cpu didn't die\n", cpu);
+		return;
+	}
+	pr_notice("CPU%u: shutdown\n", cpu);
+
+	/*
+	 * Now that the dying CPU is beyond the point of no return w.r.t.
+	 * in-kernel synchronisation, try to get the firwmare to help us to
+	 * verify that it has really left the kernel before we consider
+	 * clobbering anything it might still be using.
+	 */
+	if (!op_cpu_kill(cpu))
+		pr_warn("CPU%d may not have shut down cleanly\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we disable IRQs here, but do not re-enable them
+ * before returning to the caller. This is also the behaviour
+ * of the other hotplug-cpu capable cores, so presumably coming
+ * out of idle fixes this.
+ */
+void cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	idle_task_exit();
+
+	local_irq_disable();
+
+	/* Tell __cpu_die() that this CPU is now safe to dispose of */
+	complete(&cpu_died);
+
+	/*
+	 * Actually shutdown the CPU. This must never fail. The specific hotplug
+	 * mechanism must perform all required cache maintenance to ensure that
+	 * no dirty lines are lost in the process of shutting down the CPU.
+	 */
+	cpu_ops[cpu]->cpu_die(cpu);
+
+	BUG();
+}
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
+	do_post_cpus_up_work();
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+}
+
+/*
+ * Enumerate the possible CPU set from the device tree and build the
+ * cpu logical map array containing MPIDR values related to logical
+ * cpus. Assumes that cpu_logical_map(0) has already been initialized.
+ */
+void __init of_smp_init_cpus(void)
+{
+	struct device_node *dn = NULL;
+	unsigned int i, cpu = 1;
+	bool bootcpu_valid = false;
+
+	while ((dn = of_find_node_by_type(dn, "cpu"))) {
+		const u32 *cell;
+		u64 hwid;
+
+		/*
+		 * A cpu node with missing "reg" property is
+		 * considered invalid to build a cpu_logical_map
+		 * entry.
+		 */
+		cell = of_get_property(dn, "reg", NULL);
+		if (!cell) {
+			pr_err("%s: missing reg property\n", dn->full_name);
+			goto next;
+		}
+		hwid = of_read_number(cell, of_n_addr_cells(dn));
+
+		/*
+		 * Non affinity bits must be set to 0 in the DT
+		 */
+		if (hwid & ~MPIDR_HWID_BITMASK) {
+			pr_err("%s: invalid reg property\n", dn->full_name);
+			goto next;
+		}
+
+		/*
+		 * Duplicate MPIDRs are a recipe for disaster. Scan
+		 * all initialized entries and check for
+		 * duplicates. If any is found just ignore the cpu.
+		 * cpu_logical_map was initialized to INVALID_HWID to
+		 * avoid matching valid MPIDR values.
+		 */
+		for (i = 1; (i < cpu) && (i < NR_CPUS); i++) {
+			if (cpu_logical_map(i) == hwid) {
+				pr_err("%s: duplicate cpu reg properties in the DT\n",
+					dn->full_name);
+				goto next;
+			}
+		}
+
+		/*
+		 * The numbering scheme requires that the boot CPU
+		 * must be assigned logical id 0. Record it so that
+		 * the logical map built from DT is validated and can
+		 * be used.
+		 */
+		if (hwid == cpu_logical_map(0)) {
+			if (bootcpu_valid) {
+				pr_err("%s: duplicate boot cpu reg property in DT\n",
+					dn->full_name);
+				goto next;
+			}
+
+			bootcpu_valid = true;
+
+			/*
+			 * cpu_logical_map has already been
+			 * initialized and the boot cpu doesn't need
+			 * the enable-method so continue without
+			 * incrementing cpu.
+			 */
+			continue;
+		}
+
+		if (cpu >= NR_CPUS)
+			goto next;
+
+		if (cpu_read_ops(dn, cpu) != 0)
+			goto next;
+
+		if (cpu_ops[cpu]->cpu_init(dn, cpu))
+			goto next;
+
+		pr_debug("cpu logical map 0x%llx\n", hwid);
+		cpu_logical_map(cpu) = hwid;
+next:
+		cpu++;
+	}
+
+	/* sanity check */
+	if (cpu > NR_CPUS)
+		pr_warning("no. of cores (%d) greater than configured maximum of %d - clipping\n",
+			   cpu, NR_CPUS);
+
+	if (!bootcpu_valid) {
+		pr_err("DT missing boot CPU MPIDR, not enabling secondaries\n");
+		return;
+	}
+
+	/*
+	 * All the cpus that made it to the cpu_logical_map have been
+	 * validated so set them as possible cpus.
+	 */
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_logical_map(i) != INVALID_HWID)
+			set_cpu_possible(i, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	int err;
+	unsigned int cpu, ncores = num_possible_cpus();
+
+	init_cpu_topology();
+
+	smp_store_cpu_info(smp_processor_id());
+
+	/*
+	 * are we trying to boot more cores than exist?
+	 */
+	if (max_cpus > ncores)
+		max_cpus = ncores;
+
+	/* Don't bother if we're effectively UP */
+	if (max_cpus <= 1)
+		return;
+
+	/*
+	 * Initialise the present map (which describes the set of CPUs
+	 * actually populated at the present time) and release the
+	 * secondaries from the bootloader.
+	 *
+	 * Make sure we online at most (max_cpus - 1) additional CPUs.
+	 */
+	max_cpus--;
+	for_each_possible_cpu(cpu) {
+		if (max_cpus == 0)
+			break;
+
+		if (cpu == smp_processor_id())
+			continue;
+
+		if (!cpu_ops[cpu])
+			continue;
+
+		err = cpu_ops[cpu]->cpu_prepare(cpu);
+		if (err)
+			continue;
+
+		set_cpu_present(cpu, true);
+		max_cpus--;
+	}
+}
+
+void (*__smp_cross_call)(const struct cpumask *, unsigned int);
+
+void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
+{
+	__smp_cross_call = fn;
+}
+
+static const char *ipi_types[NR_IPI] __tracepoint_string = {
+#define S(x,s)	[x] = s
+	S(IPI_RESCHEDULE, "Rescheduling interrupts"),
+	S(IPI_CALL_FUNC, "Function call interrupts"),
+	S(IPI_CPU_STOP, "CPU stop interrupts"),
+	S(IPI_TIMER, "Timer broadcast interrupts"),
+	S(IPI_IRQ_WORK, "IRQ work interrupts"),
+};
+
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
+{
+	trace_ipi_raise(target, ipi_types[ipinr]);
+	__smp_cross_call(target, ipinr);
+}
+
+void show_ipi_list(struct seq_file *p, int prec)
+{
+	unsigned int cpu, i;
+
+	for (i = 0; i < NR_IPI; i++) {
+		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
+			   prec >= 4 ? " " : "");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ",
+				   __get_irq_stat(cpu, ipi_irqs[i]));
+		seq_printf(p, "      %s\n", ipi_types[i]);
+	}
+}
+
+u64 smp_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = 0;
+	int i;
+
+	for (i = 0; i < NR_IPI; i++)
+		sum += __get_irq_stat(cpu, ipi_irqs[i]);
+
+	return sum;
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_CALL_FUNC);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
+}
+
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+	if (__smp_cross_call)
+		smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+}
+#endif
+
+static DEFINE_RAW_SPINLOCK(stop_lock);
+
+/*
+ * ipi_cpu_stop - handle IPI from smp_send_stop()
+ */
+static void ipi_cpu_stop(unsigned int cpu)
+{
+	if (system_state == SYSTEM_BOOTING ||
+	    system_state == SYSTEM_RUNNING) {
+		raw_spin_lock(&stop_lock);
+		pr_crit("CPU%u: stopping\n", cpu);
+		dump_stack();
+		raw_spin_unlock(&stop_lock);
+	}
+
+	set_cpu_online(cpu, false);
+
+	local_irq_disable();
+
+	while (1)
+		cpu_relax();
+}
+
+/*
+ * Main handler for inter-processor interrupts
+ */
+void handle_IPI(int ipinr, struct pt_regs *regs)
+{
+	unsigned int cpu = smp_processor_id();
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	if ((unsigned)ipinr < NR_IPI) {
+		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
+		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
+	}
+
+	switch (ipinr) {
+	case IPI_RESCHEDULE:
+		scheduler_ipi();
+		break;
+
+	case IPI_CALL_FUNC:
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
+		break;
+
+	case IPI_CPU_STOP:
+		irq_enter();
+		ipi_cpu_stop(cpu);
+		irq_exit();
+		break;
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	case IPI_TIMER:
+		irq_enter();
+		tick_receive_broadcast();
+		irq_exit();
+		break;
+#endif
+
+#ifdef CONFIG_IRQ_WORK
+	case IPI_IRQ_WORK:
+		irq_enter();
+		irq_work_run();
+		irq_exit();
+		break;
+#endif
+
+	default:
+		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
+		break;
+	}
+
+	if ((unsigned)ipinr < NR_IPI)
+		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
+	set_irq_regs(old_regs);
+}
+
+void smp_send_reschedule(int cpu)
+{
+	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_TIMER);
+}
+#endif
+
+void smp_send_stop(void)
+{
+	unsigned long timeout;
+
+	if (num_online_cpus() > 1) {
+		cpumask_t mask;
+
+		cpumask_copy(&mask, cpu_online_mask);
+		cpumask_clear_cpu(smp_processor_id(), &mask);
+
+		smp_cross_call(&mask, IPI_CPU_STOP);
+	}
+
+	/* Wait up to one second for other CPUs to stop */
+	timeout = USEC_PER_SEC;
+	while (num_online_cpus() > 1 && timeout--)
+		udelay(1);
+
+	if (num_online_cpus() > 1)
+		pr_warning("SMP: failed to stop secondary CPUs\n");
+}
+
+/*
+ * not supported here
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
new file mode 100644
index 000000000..14944e5b2
--- /dev/null
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -0,0 +1,127 @@
+/*
+ * Spin Table SMP initialisation
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/cputype.h>
+#include <asm/io.h>
+#include <asm/smp_plat.h>
+
+extern void secondary_holding_pen(void);
+volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
+
+static phys_addr_t cpu_release_addr[NR_CPUS];
+
+/*
+ * Write secondary_holding_pen_release in a way that is guaranteed to be
+ * visible to all observers, irrespective of whether they're taking part
+ * in coherency or not.  This is necessary for the hotplug code to work
+ * reliably.
+ */
+static void write_pen_release(u64 val)
+{
+	void *start = (void *)&secondary_holding_pen_release;
+	unsigned long size = sizeof(secondary_holding_pen_release);
+
+	secondary_holding_pen_release = val;
+	__flush_dcache_area(start, size);
+}
+
+
+static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu)
+{
+	/*
+	 * Determine the address from which the CPU is polling.
+	 */
+	if (of_property_read_u64(dn, "cpu-release-addr",
+				 &cpu_release_addr[cpu])) {
+		pr_err("CPU %d: missing or invalid cpu-release-addr property\n",
+		       cpu);
+
+		return -1;
+	}
+
+	return 0;
+}
+
+static int smp_spin_table_cpu_prepare(unsigned int cpu)
+{
+	__le64 __iomem *release_addr;
+
+	if (!cpu_release_addr[cpu])
+		return -ENODEV;
+
+	/*
+	 * The cpu-release-addr may or may not be inside the linear mapping.
+	 * As ioremap_cache will either give us a new mapping or reuse the
+	 * existing linear mapping, we can use it to cover both cases. In
+	 * either case the memory will be MT_NORMAL.
+	 */
+	release_addr = ioremap_cache(cpu_release_addr[cpu],
+				     sizeof(*release_addr));
+	if (!release_addr)
+		return -ENOMEM;
+
+	/*
+	 * We write the release address as LE regardless of the native
+	 * endianess of the kernel. Therefore, any boot-loaders that
+	 * read this address need to convert this address to the
+	 * boot-loader's endianess before jumping. This is mandated by
+	 * the boot protocol.
+	 */
+	writeq_relaxed(__pa(secondary_holding_pen), release_addr);
+	__flush_dcache_area((__force void *)release_addr,
+			    sizeof(*release_addr));
+
+	/*
+	 * Send an event to wake up the secondary CPU.
+	 */
+	sev();
+
+	iounmap(release_addr);
+
+	return 0;
+}
+
+static int smp_spin_table_cpu_boot(unsigned int cpu)
+{
+	/*
+	 * Update the pen release flag.
+	 */
+	write_pen_release(cpu_logical_map(cpu));
+
+	/*
+	 * Send an event, causing the secondaries to read pen_release.
+	 */
+	sev();
+
+	return 0;
+}
+
+const struct cpu_operations smp_spin_table_ops = {
+	.name		= "spin-table",
+	.cpu_init	= smp_spin_table_cpu_init,
+	.cpu_prepare	= smp_spin_table_cpu_prepare,
+	.cpu_boot	= smp_spin_table_cpu_boot,
+};
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
new file mode 100644
index 000000000..407991bf7
--- /dev/null
+++ b/arch/arm64/kernel/stacktrace.c
@@ -0,0 +1,130 @@
+/*
+ * Stack tracing support
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/stacktrace.h>
+
+/*
+ * AArch64 PCS assigns the frame pointer to x29.
+ *
+ * A simple function prologue looks like this:
+ * 	sub	sp, sp, #0x10
+ *   	stp	x29, x30, [sp]
+ *	mov	x29, sp
+ *
+ * A simple function epilogue looks like this:
+ *	mov	sp, x29
+ *	ldp	x29, x30, [sp]
+ *	add	sp, sp, #0x10
+ */
+int notrace unwind_frame(struct stackframe *frame)
+{
+	unsigned long high, low;
+	unsigned long fp = frame->fp;
+
+	low  = frame->sp;
+	high = ALIGN(low, THREAD_SIZE);
+
+	if (fp < low || fp > high - 0x18 || fp & 0xf)
+		return -EINVAL;
+
+	frame->sp = fp + 0x10;
+	frame->fp = *(unsigned long *)(fp);
+	/*
+	 * -4 here because we care about the PC at time of bl,
+	 * not where the return will go.
+	 */
+	frame->pc = *(unsigned long *)(fp + 8) - 4;
+
+	return 0;
+}
+
+void notrace walk_stackframe(struct stackframe *frame,
+		     int (*fn)(struct stackframe *, void *), void *data)
+{
+	while (1) {
+		int ret;
+
+		if (fn(frame, data))
+			break;
+		ret = unwind_frame(frame);
+		if (ret < 0)
+			break;
+	}
+}
+EXPORT_SYMBOL(walk_stackframe);
+
+#ifdef CONFIG_STACKTRACE
+struct stack_trace_data {
+	struct stack_trace *trace;
+	unsigned int no_sched_functions;
+	unsigned int skip;
+};
+
+static int save_trace(struct stackframe *frame, void *d)
+{
+	struct stack_trace_data *data = d;
+	struct stack_trace *trace = data->trace;
+	unsigned long addr = frame->pc;
+
+	if (data->no_sched_functions && in_sched_functions(addr))
+		return 0;
+	if (data->skip) {
+		data->skip--;
+		return 0;
+	}
+
+	trace->entries[trace->nr_entries++] = addr;
+
+	return trace->nr_entries >= trace->max_entries;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	struct stack_trace_data data;
+	struct stackframe frame;
+
+	data.trace = trace;
+	data.skip = trace->skip;
+
+	if (tsk != current) {
+		data.no_sched_functions = 1;
+		frame.fp = thread_saved_fp(tsk);
+		frame.sp = thread_saved_sp(tsk);
+		frame.pc = thread_saved_pc(tsk);
+	} else {
+		data.no_sched_functions = 0;
+		frame.fp = (unsigned long)__builtin_frame_address(0);
+		frame.sp = current_stack_pointer;
+		frame.pc = (unsigned long)save_stack_trace_tsk;
+	}
+
+	walk_stackframe(&frame, save_trace, &data);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	save_stack_trace_tsk(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
new file mode 100644
index 000000000..d7daf45ae
--- /dev/null
+++ b/arch/arm64/kernel/suspend.c
@@ -0,0 +1,141 @@
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
+/*
+ * This is called by __cpu_suspend_enter() to save the state, and do whatever
+ * flushing is required to ensure that when the CPU goes to sleep we have
+ * the necessary data available when the caches are not searched.
+ *
+ * ptr: CPU context virtual address
+ * save_ptr: address of the location where the context physical address
+ *           must be saved
+ */
+void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
+				phys_addr_t *save_ptr)
+{
+	*save_ptr = virt_to_phys(ptr);
+
+	cpu_do_suspend(ptr);
+	/*
+	 * Only flush the context that must be retrieved with the MMU
+	 * off. VA primitives ensure the flush is applied to all
+	 * cache levels so context is pushed to DRAM.
+	 */
+	__flush_dcache_area(ptr, sizeof(*ptr));
+	__flush_dcache_area(save_ptr, sizeof(*save_ptr));
+}
+
+/*
+ * This hook is provided so that cpu_suspend code can restore HW
+ * breakpoints as early as possible in the resume path, before reenabling
+ * debug exceptions. Code cannot be run from a CPU PM notifier since by the
+ * time the notifier runs debug exceptions might have been enabled already,
+ * with HW breakpoints registers content still in an unknown state.
+ */
+void (*hw_breakpoint_restore)(void *);
+void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+{
+	/* Prevent multiple restore hook initializations */
+	if (WARN_ON(hw_breakpoint_restore))
+		return;
+	hw_breakpoint_restore = hw_bp_restore;
+}
+
+/*
+ * __cpu_suspend
+ *
+ * arg: argument to pass to the finisher function
+ * fn: finisher function pointer
+ *
+ */
+int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+	unsigned long flags;
+
+	/*
+	 * From this point debug exceptions are disabled to prevent
+	 * updates to mdscr register (saved and restored along with
+	 * general purpose registers) from kernel debuggers.
+	 */
+	local_dbg_save(flags);
+
+	/*
+	 * mm context saved on the stack, it will be restored when
+	 * the cpu comes out of reset through the identity mapped
+	 * page tables, so that the thread address space is properly
+	 * set-up on function return.
+	 */
+	ret = __cpu_suspend_enter(arg, fn);
+	if (ret == 0) {
+		/*
+		 * We are resuming from reset with TTBR0_EL1 set to the
+		 * idmap to enable the MMU; restore the active_mm mappings in
+		 * TTBR0_EL1 unless the active_mm == &init_mm, in which case
+		 * the thread entered __cpu_suspend with TTBR0_EL1 set to
+		 * reserved TTBR0 page tables and should be restored as such.
+		 */
+		if (mm == &init_mm)
+			cpu_set_reserved_ttbr0();
+		else
+			cpu_switch_mm(mm->pgd, mm);
+
+		flush_tlb_all();
+
+		/*
+		 * Restore per-cpu offset before any kernel
+		 * subsystem relying on it has a chance to run.
+		 */
+		set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+
+		/*
+		 * Restore HW breakpoint registers to sane values
+		 * before debug exceptions are possibly reenabled
+		 * through local_dbg_restore.
+		 */
+		if (hw_breakpoint_restore)
+			hw_breakpoint_restore(NULL);
+	}
+
+	/*
+	 * Restore pstate flags. OS lock and mdscr have been already
+	 * restored, so from this point onwards, debugging is fully
+	 * renabled if it was enabled when core started shutdown.
+	 */
+	local_dbg_restore(flags);
+
+	return ret;
+}
+
+struct sleep_save_sp sleep_save_sp;
+phys_addr_t sleep_idmap_phys;
+
+static int __init cpu_suspend_init(void)
+{
+	void *ctx_ptr;
+
+	/* ctx_ptr is an array of physical addresses */
+	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+
+	if (WARN_ON(!ctx_ptr))
+		return -ENOMEM;
+
+	sleep_save_sp.save_ptr_stash = ctx_ptr;
+	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
+	sleep_idmap_phys = virt_to_phys(idmap_pg_dir);
+	__flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
+	__flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys));
+
+	return 0;
+}
+early_initcall(cpu_suspend_init);
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
new file mode 100644
index 000000000..75151aaf1
--- /dev/null
+++ b/arch/arm64/kernel/sys.c
@@ -0,0 +1,55 @@
+/*
+ * AArch64-specific system calls implementation
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+			 unsigned long prot, unsigned long flags,
+			 unsigned long fd, off_t off)
+{
+	if (offset_in_page(off) != 0)
+		return -EINVAL;
+
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
+
+/*
+ * Wrappers to pass the pt_regs argument.
+ */
+asmlinkage long sys_rt_sigreturn_wrapper(void);
+#define sys_rt_sigreturn	sys_rt_sigreturn_wrapper
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym)	[nr] = sym,
+
+/*
+ * The sys_call_table array must be 4K aligned to be accessible from
+ * kernel/entry.S.
+ */
+void * const sys_call_table[__NR_syscalls] __aligned(4096) = {
+	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
new file mode 100644
index 000000000..a40b1343b
--- /dev/null
+++ b/arch/arm64/kernel/sys32.c
@@ -0,0 +1,52 @@
+/*
+ * arch/arm64/kernel/sys32.c
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software(void); you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http(void);//www.gnu.org/licenses/>.
+ */
+
+/*
+ * Needed to avoid conflicting __NR_* macros between uapi/asm/unistd.h and
+ * asm/unistd32.h.
+ */
+#define __COMPAT_SYSCALL_NR
+
+#include <linux/compiler.h>
+#include <linux/syscalls.h>
+
+asmlinkage long compat_sys_sigreturn_wrapper(void);
+asmlinkage long compat_sys_rt_sigreturn_wrapper(void);
+asmlinkage long compat_sys_statfs64_wrapper(void);
+asmlinkage long compat_sys_fstatfs64_wrapper(void);
+asmlinkage long compat_sys_pread64_wrapper(void);
+asmlinkage long compat_sys_pwrite64_wrapper(void);
+asmlinkage long compat_sys_truncate64_wrapper(void);
+asmlinkage long compat_sys_ftruncate64_wrapper(void);
+asmlinkage long compat_sys_readahead_wrapper(void);
+asmlinkage long compat_sys_fadvise64_64_wrapper(void);
+asmlinkage long compat_sys_sync_file_range2_wrapper(void);
+asmlinkage long compat_sys_fallocate_wrapper(void);
+asmlinkage long compat_sys_mmap2_wrapper(void);
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym)	[nr] = sym,
+
+/*
+ * The sys_call_table array must be 4K aligned to be accessible from
+ * kernel/entry.S.
+ */
+void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = {
+	[0 ... __NR_compat_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd32.h>
+};
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
new file mode 100644
index 000000000..28c511b06
--- /dev/null
+++ b/arch/arm64/kernel/sys_compat.c
@@ -0,0 +1,103 @@
+/*
+ * Based on arch/arm/kernel/sys_arm.c
+ *
+ * Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c
+ * Copyright (C) 1995, 1996 Russell King.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compat.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/unistd.h>
+
+static long
+__do_compat_cache_op(unsigned long start, unsigned long end)
+{
+	long ret;
+
+	do {
+		unsigned long chunk = min(PAGE_SIZE, end - start);
+
+		if (fatal_signal_pending(current))
+			return 0;
+
+		ret = __flush_cache_user_range(start, start + chunk);
+		if (ret)
+			return ret;
+
+		cond_resched();
+		start += chunk;
+	} while (start < end);
+
+	return 0;
+}
+
+static inline long
+do_compat_cache_op(unsigned long start, unsigned long end, int flags)
+{
+	if (end < start || flags)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ, start, end - start))
+		return -EFAULT;
+
+	return __do_compat_cache_op(start, end);
+}
+/*
+ * Handle all unrecognised system calls.
+ */
+long compat_arm_syscall(struct pt_regs *regs)
+{
+	unsigned int no = regs->regs[7];
+
+	switch (no) {
+	/*
+	 * Flush a region from virtual address 'r0' to virtual address 'r1'
+	 * _exclusive_.  There is no alignment requirement on either address;
+	 * user space does not need to know the hardware cache layout.
+	 *
+	 * r2 contains flags.  It should ALWAYS be passed as ZERO until it
+	 * is defined to be something else.  For now we ignore it, but may
+	 * the fires of hell burn in your belly if you break this rule. ;)
+	 *
+	 * (at a later date, we may want to allow this call to not flush
+	 * various aspects of the cache.  Passing '0' will guarantee that
+	 * everything necessary gets flushed to maintain consistency in
+	 * the specified region).
+	 */
+	case __ARM_NR_compat_cacheflush:
+		return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]);
+
+	case __ARM_NR_compat_set_tls:
+		current->thread.tp_value = regs->regs[0];
+
+		/*
+		 * Protect against register corruption from context switch.
+		 * See comment in tls_thread_flush.
+		 */
+		barrier();
+		asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0]));
+		return 0;
+
+	default:
+		return -ENOSYS;
+	}
+}
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
new file mode 100644
index 000000000..42f9195cf
--- /dev/null
+++ b/arch/arm64/kernel/time.c
@@ -0,0 +1,88 @@
+/*
+ * Based on arch/arm/kernel/time.c
+ *
+ * Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ * Modifications for ARM (C) 1994-2001 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clockchips.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/profile.h>
+#include <linux/syscore_ops.h>
+#include <linux/timer.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/clocksource.h>
+#include <linux/clk-provider.h>
+#include <linux/acpi.h>
+
+#include <clocksource/arm_arch_timer.h>
+
+#include <asm/thread_info.h>
+#include <asm/stacktrace.h>
+
+#ifdef CONFIG_SMP
+unsigned long profile_pc(struct pt_regs *regs)
+{
+	struct stackframe frame;
+
+	if (!in_lock_functions(regs->pc))
+		return regs->pc;
+
+	frame.fp = regs->regs[29];
+	frame.sp = regs->sp;
+	frame.pc = regs->pc;
+	do {
+		int ret = unwind_frame(&frame);
+		if (ret < 0)
+			return 0;
+	} while (in_lock_functions(frame.pc));
+
+	return frame.pc;
+}
+EXPORT_SYMBOL(profile_pc);
+#endif
+
+void __init time_init(void)
+{
+	u32 arch_timer_rate;
+
+	of_clk_init(NULL);
+	clocksource_of_init();
+
+	tick_setup_hrtimer_broadcast();
+
+	/*
+	 * Since ACPI or FDT will only one be available in the system,
+	 * we can use acpi_generic_timer_init() here safely
+	 */
+	acpi_generic_timer_init();
+
+	arch_timer_rate = arch_timer_get_rate();
+	if (!arch_timer_rate)
+		panic("Unable to initialise architected timer.\n");
+
+	/* Calibrate the delay loop directly */
+	lpj_fine = arch_timer_rate / HZ;
+}
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 000000000..fcb8f7b42
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,305 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
+ *
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+
+#include <asm/cputype.h>
+#include <asm/topology.h>
+
+static int __init get_cpu_for_node(struct device_node *node)
+{
+	struct device_node *cpu_node;
+	int cpu;
+
+	cpu_node = of_parse_phandle(node, "cpu", 0);
+	if (!cpu_node)
+		return -1;
+
+	for_each_possible_cpu(cpu) {
+		if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+			of_node_put(cpu_node);
+			return cpu;
+		}
+	}
+
+	pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+	of_node_put(cpu_node);
+	return -1;
+}
+
+static int __init parse_core(struct device_node *core, int cluster_id,
+			     int core_id)
+{
+	char name[10];
+	bool leaf = true;
+	int i = 0;
+	int cpu;
+	struct device_node *t;
+
+	do {
+		snprintf(name, sizeof(name), "thread%d", i);
+		t = of_get_child_by_name(core, name);
+		if (t) {
+			leaf = false;
+			cpu = get_cpu_for_node(t);
+			if (cpu >= 0) {
+				cpu_topology[cpu].cluster_id = cluster_id;
+				cpu_topology[cpu].core_id = core_id;
+				cpu_topology[cpu].thread_id = i;
+			} else {
+				pr_err("%s: Can't get CPU for thread\n",
+				       t->full_name);
+				of_node_put(t);
+				return -EINVAL;
+			}
+			of_node_put(t);
+		}
+		i++;
+	} while (t);
+
+	cpu = get_cpu_for_node(core);
+	if (cpu >= 0) {
+		if (!leaf) {
+			pr_err("%s: Core has both threads and CPU\n",
+			       core->full_name);
+			return -EINVAL;
+		}
+
+		cpu_topology[cpu].cluster_id = cluster_id;
+		cpu_topology[cpu].core_id = core_id;
+	} else if (leaf) {
+		pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init parse_cluster(struct device_node *cluster, int depth)
+{
+	char name[10];
+	bool leaf = true;
+	bool has_cores = false;
+	struct device_node *c;
+	static int cluster_id __initdata;
+	int core_id = 0;
+	int i, ret;
+
+	/*
+	 * First check for child clusters; we currently ignore any
+	 * information about the nesting of clusters and present the
+	 * scheduler with a flat list of them.
+	 */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "cluster%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			leaf = false;
+			ret = parse_cluster(c, depth + 1);
+			of_node_put(c);
+			if (ret != 0)
+				return ret;
+		}
+		i++;
+	} while (c);
+
+	/* Now check for cores */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "core%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			has_cores = true;
+
+			if (depth == 0) {
+				pr_err("%s: cpu-map children should be clusters\n",
+				       c->full_name);
+				of_node_put(c);
+				return -EINVAL;
+			}
+
+			if (leaf) {
+				ret = parse_core(c, cluster_id, core_id++);
+			} else {
+				pr_err("%s: Non-leaf cluster with core %s\n",
+				       cluster->full_name, name);
+				ret = -EINVAL;
+			}
+
+			of_node_put(c);
+			if (ret != 0)
+				return ret;
+		}
+		i++;
+	} while (c);
+
+	if (leaf && !has_cores)
+		pr_warn("%s: empty cluster\n", cluster->full_name);
+
+	if (leaf)
+		cluster_id++;
+
+	return 0;
+}
+
+static int __init parse_dt_topology(void)
+{
+	struct device_node *cn, *map;
+	int ret = 0;
+	int cpu;
+
+	cn = of_find_node_by_path("/cpus");
+	if (!cn) {
+		pr_err("No CPU information found in DT\n");
+		return 0;
+	}
+
+	/*
+	 * When topology is provided cpu-map is essentially a root
+	 * cluster with restricted subnodes.
+	 */
+	map = of_get_child_by_name(cn, "cpu-map");
+	if (!map)
+		goto out;
+
+	ret = parse_cluster(map, 0);
+	if (ret != 0)
+		goto out_map;
+
+	/*
+	 * Check that all cores are in the topology; the SMP code will
+	 * only mark cores described in the DT as possible.
+	 */
+	for_each_possible_cpu(cpu)
+		if (cpu_topology[cpu].cluster_id == -1)
+			ret = -EINVAL;
+
+out_map:
+	of_node_put(map);
+out:
+	of_node_put(cn);
+	return ret;
+}
+
+/*
+ * cpu topology table
+ */
+struct cpu_topology cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
+	u64 mpidr;
+
+	if (cpuid_topo->cluster_id != -1)
+		goto topology_populated;
+
+	mpidr = read_cpuid_mpidr();
+
+	/* Uniprocessor systems can rely on default topology values */
+	if (mpidr & MPIDR_UP_BITMASK)
+		return;
+
+	/* Create cpu topology mapping based on MPIDR. */
+	if (mpidr & MPIDR_MT_BITMASK) {
+		/* Multiprocessor system : Multi-threads per core */
+		cpuid_topo->thread_id  = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
+					 MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8;
+	} else {
+		/* Multiprocessor system : Single-thread per core */
+		cpuid_topo->thread_id  = -1;
+		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
+					 MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 |
+					 MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16;
+	}
+
+	pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
+		 cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id,
+		 cpuid_topo->thread_id, mpidr);
+
+topology_populated:
+	update_siblings_masks(cpuid);
+}
+
+static void __init reset_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+		cpu_topo->thread_id = -1;
+		cpu_topo->core_id = 0;
+		cpu_topo->cluster_id = -1;
+
+		cpumask_clear(&cpu_topo->core_sibling);
+		cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
+		cpumask_clear(&cpu_topo->thread_sibling);
+		cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+	}
+}
+
+void __init init_cpu_topology(void)
+{
+	reset_cpu_topology();
+
+	/*
+	 * Discard anything that was parsed if we hit an error so we
+	 * don't use partial information.
+	 */
+	if (parse_dt_topology())
+		reset_cpu_topology();
+}
diff --git a/arch/arm64/kernel/trace-events-emulation.h b/arch/arm64/kernel/trace-events-emulation.h
new file mode 100644
index 000000000..ae1dd598e
--- /dev/null
+++ b/arch/arm64/kernel/trace-events-emulation.h
@@ -0,0 +1,35 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM emulation
+
+#if !defined(_TRACE_EMULATION_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EMULATION_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(instruction_emulation,
+
+	TP_PROTO(const char *instr, u64 addr),
+	TP_ARGS(instr, addr),
+
+	TP_STRUCT__entry(
+		__string(instr, instr)
+		__field(u64, addr)
+	),
+
+	TP_fast_assign(
+		__assign_str(instr, instr);
+		__entry->addr = addr;
+	),
+
+	TP_printk("instr=\"%s\" addr=0x%llx", __get_str(instr), __entry->addr)
+);
+
+#endif /* _TRACE_EMULATION_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+
+#define TRACE_INCLUDE_FILE trace-events-emulation
+#include <trace/define_trace.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
new file mode 100644
index 000000000..1ef2940df
--- /dev/null
+++ b/arch/arm64/kernel/traps.c
@@ -0,0 +1,466 @@
+/*
+ * Based on arch/arm/kernel/traps.c
+ *
+ * Copyright (C) 1995-2009 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/signal.h>
+#include <linux/personality.h>
+#include <linux/kallsyms.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+
+#include <asm/atomic.h>
+#include <asm/debug-monitors.h>
+#include <asm/esr.h>
+#include <asm/traps.h>
+#include <asm/stacktrace.h>
+#include <asm/exception.h>
+#include <asm/system_misc.h>
+
+static const char *handler[]= {
+	"Synchronous Abort",
+	"IRQ",
+	"FIQ",
+	"Error"
+};
+
+int show_unhandled_signals = 1;
+
+/*
+ * Dump out the contents of some memory nicely...
+ */
+static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
+		     unsigned long top)
+{
+	unsigned long first;
+	mm_segment_t fs;
+	int i;
+
+	/*
+	 * We need to switch to kernel mode so that we can use __get_user
+	 * to safely read from kernel space.  Note that we now dump the
+	 * code first, just in case the backtrace kills us.
+	 */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	printk("%s%s(0x%016lx to 0x%016lx)\n", lvl, str, bottom, top);
+
+	for (first = bottom & ~31; first < top; first += 32) {
+		unsigned long p;
+		char str[sizeof(" 12345678") * 8 + 1];
+
+		memset(str, ' ', sizeof(str));
+		str[sizeof(str) - 1] = '\0';
+
+		for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
+			if (p >= bottom && p < top) {
+				unsigned int val;
+				if (__get_user(val, (unsigned int *)p) == 0)
+					sprintf(str + i * 9, " %08x", val);
+				else
+					sprintf(str + i * 9, " ????????");
+			}
+		}
+		printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
+	}
+
+	set_fs(fs);
+}
+
+static void dump_backtrace_entry(unsigned long where, unsigned long stack)
+{
+	print_ip_sym(where);
+	if (in_exception_text(where))
+		dump_mem("", "Exception stack", stack,
+			 stack + sizeof(struct pt_regs));
+}
+
+static void dump_instr(const char *lvl, struct pt_regs *regs)
+{
+	unsigned long addr = instruction_pointer(regs);
+	mm_segment_t fs;
+	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
+	int i;
+
+	/*
+	 * We need to switch to kernel mode so that we can use __get_user
+	 * to safely read from kernel space.  Note that we now dump the
+	 * code first, just in case the backtrace kills us.
+	 */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	for (i = -4; i < 1; i++) {
+		unsigned int val, bad;
+
+		bad = __get_user(val, &((u32 *)addr)[i]);
+
+		if (!bad)
+			p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
+		else {
+			p += sprintf(p, "bad PC value");
+			break;
+		}
+	}
+	printk("%sCode: %s\n", lvl, str);
+
+	set_fs(fs);
+}
+
+static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+{
+	struct stackframe frame;
+
+	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+
+	if (!tsk)
+		tsk = current;
+
+	if (regs) {
+		frame.fp = regs->regs[29];
+		frame.sp = regs->sp;
+		frame.pc = regs->pc;
+	} else if (tsk == current) {
+		frame.fp = (unsigned long)__builtin_frame_address(0);
+		frame.sp = current_stack_pointer;
+		frame.pc = (unsigned long)dump_backtrace;
+	} else {
+		/*
+		 * task blocked in __switch_to
+		 */
+		frame.fp = thread_saved_fp(tsk);
+		frame.sp = thread_saved_sp(tsk);
+		frame.pc = thread_saved_pc(tsk);
+	}
+
+	pr_emerg("Call trace:\n");
+	while (1) {
+		unsigned long where = frame.pc;
+		int ret;
+
+		ret = unwind_frame(&frame);
+		if (ret < 0)
+			break;
+		dump_backtrace_entry(where, frame.sp);
+	}
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+	dump_backtrace(NULL, tsk);
+	barrier();
+}
+
+#ifdef CONFIG_PREEMPT
+#define S_PREEMPT " PREEMPT"
+#else
+#define S_PREEMPT ""
+#endif
+#ifdef CONFIG_SMP
+#define S_SMP " SMP"
+#else
+#define S_SMP ""
+#endif
+
+static int __die(const char *str, int err, struct thread_info *thread,
+		 struct pt_regs *regs)
+{
+	struct task_struct *tsk = thread->task;
+	static int die_counter;
+	int ret;
+
+	pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
+		 str, err, ++die_counter);
+
+	/* trap and error numbers are mostly meaningless on ARM */
+	ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV);
+	if (ret == NOTIFY_STOP)
+		return ret;
+
+	print_modules();
+	__show_regs(regs);
+	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
+		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
+
+	if (!user_mode(regs) || in_interrupt()) {
+		dump_mem(KERN_EMERG, "Stack: ", regs->sp,
+			 THREAD_SIZE + (unsigned long)task_stack_page(tsk));
+		dump_backtrace(regs, tsk);
+		dump_instr(KERN_EMERG, regs);
+	}
+
+	return ret;
+}
+
+static DEFINE_RAW_SPINLOCK(die_lock);
+
+/*
+ * This function is protected against re-entrancy.
+ */
+void die(const char *str, struct pt_regs *regs, int err)
+{
+	struct thread_info *thread = current_thread_info();
+	int ret;
+
+	oops_enter();
+
+	raw_spin_lock_irq(&die_lock);
+	console_verbose();
+	bust_spinlocks(1);
+	ret = __die(str, err, thread, regs);
+
+	if (regs && kexec_should_crash(thread->task))
+		crash_kexec(regs);
+
+	bust_spinlocks(0);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	raw_spin_unlock_irq(&die_lock);
+	oops_exit();
+
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+	if (panic_on_oops)
+		panic("Fatal exception");
+	if (ret != NOTIFY_STOP)
+		do_exit(SIGSEGV);
+}
+
+void arm64_notify_die(const char *str, struct pt_regs *regs,
+		      struct siginfo *info, int err)
+{
+	if (user_mode(regs)) {
+		current->thread.fault_address = 0;
+		current->thread.fault_code = err;
+		force_sig_info(info->si_signo, info, current);
+	} else {
+		die(str, regs, err);
+	}
+}
+
+static LIST_HEAD(undef_hook);
+static DEFINE_RAW_SPINLOCK(undef_lock);
+
+void register_undef_hook(struct undef_hook *hook)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_add(&hook->node, &undef_hook);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+}
+
+void unregister_undef_hook(struct undef_hook *hook)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_del(&hook->node);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+}
+
+static int call_undef_hook(struct pt_regs *regs)
+{
+	struct undef_hook *hook;
+	unsigned long flags;
+	u32 instr;
+	int (*fn)(struct pt_regs *regs, u32 instr) = NULL;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+
+	if (!user_mode(regs))
+		return 1;
+
+	if (compat_thumb_mode(regs)) {
+		/* 16-bit Thumb instruction */
+		if (get_user(instr, (u16 __user *)pc))
+			goto exit;
+		instr = le16_to_cpu(instr);
+		if (aarch32_insn_is_wide(instr)) {
+			u32 instr2;
+
+			if (get_user(instr2, (u16 __user *)(pc + 2)))
+				goto exit;
+			instr2 = le16_to_cpu(instr2);
+			instr = (instr << 16) | instr2;
+		}
+	} else {
+		/* 32-bit ARM instruction */
+		if (get_user(instr, (u32 __user *)pc))
+			goto exit;
+		instr = le32_to_cpu(instr);
+	}
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_for_each_entry(hook, &undef_hook, node)
+		if ((instr & hook->instr_mask) == hook->instr_val &&
+			(regs->pstate & hook->pstate_mask) == hook->pstate_val)
+			fn = hook->fn;
+
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+exit:
+	return fn ? fn(regs, instr) : 1;
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
+	siginfo_t info;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+
+	/* check for AArch32 breakpoint instructions */
+	if (!aarch32_break_handler(regs))
+		return;
+
+	if (call_undef_hook(regs) == 0)
+		return;
+
+	if (show_unhandled_signals && unhandled_signal(current, SIGILL) &&
+	    printk_ratelimit()) {
+		pr_info("%s[%d]: undefined instruction: pc=%p\n",
+			current->comm, task_pid_nr(current), pc);
+		dump_instr(KERN_INFO, regs);
+	}
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLOPC;
+	info.si_addr  = pc;
+
+	arm64_notify_die("Oops - undefined instruction", regs, &info, 0);
+}
+
+long compat_arm_syscall(struct pt_regs *regs);
+
+asmlinkage long do_ni_syscall(struct pt_regs *regs)
+{
+#ifdef CONFIG_COMPAT
+	long ret;
+	if (is_compat_task()) {
+		ret = compat_arm_syscall(regs);
+		if (ret != -ENOSYS)
+			return ret;
+	}
+#endif
+
+	if (show_unhandled_signals && printk_ratelimit()) {
+		pr_info("%s[%d]: syscall %d\n", current->comm,
+			task_pid_nr(current), (int)regs->syscallno);
+		dump_instr("", regs);
+		if (user_mode(regs))
+			__show_regs(regs);
+	}
+
+	return sys_ni_syscall();
+}
+
+static const char *esr_class_str[] = {
+	[0 ... ESR_ELx_EC_MAX]		= "UNRECOGNIZED EC",
+	[ESR_ELx_EC_UNKNOWN]		= "Unknown/Uncategorized",
+	[ESR_ELx_EC_WFx]		= "WFI/WFE",
+	[ESR_ELx_EC_CP15_32]		= "CP15 MCR/MRC",
+	[ESR_ELx_EC_CP15_64]		= "CP15 MCRR/MRRC",
+	[ESR_ELx_EC_CP14_MR]		= "CP14 MCR/MRC",
+	[ESR_ELx_EC_CP14_LS]		= "CP14 LDC/STC",
+	[ESR_ELx_EC_FP_ASIMD]		= "ASIMD",
+	[ESR_ELx_EC_CP10_ID]		= "CP10 MRC/VMRS",
+	[ESR_ELx_EC_CP14_64]		= "CP14 MCRR/MRRC",
+	[ESR_ELx_EC_ILL]		= "PSTATE.IL",
+	[ESR_ELx_EC_SVC32]		= "SVC (AArch32)",
+	[ESR_ELx_EC_HVC32]		= "HVC (AArch32)",
+	[ESR_ELx_EC_SMC32]		= "SMC (AArch32)",
+	[ESR_ELx_EC_SVC64]		= "SVC (AArch64)",
+	[ESR_ELx_EC_HVC64]		= "HVC (AArch64)",
+	[ESR_ELx_EC_SMC64]		= "SMC (AArch64)",
+	[ESR_ELx_EC_SYS64]		= "MSR/MRS (AArch64)",
+	[ESR_ELx_EC_IMP_DEF]		= "EL3 IMP DEF",
+	[ESR_ELx_EC_IABT_LOW]		= "IABT (lower EL)",
+	[ESR_ELx_EC_IABT_CUR]		= "IABT (current EL)",
+	[ESR_ELx_EC_PC_ALIGN]		= "PC Alignment",
+	[ESR_ELx_EC_DABT_LOW]		= "DABT (lower EL)",
+	[ESR_ELx_EC_DABT_CUR]		= "DABT (current EL)",
+	[ESR_ELx_EC_SP_ALIGN]		= "SP Alignment",
+	[ESR_ELx_EC_FP_EXC32]		= "FP (AArch32)",
+	[ESR_ELx_EC_FP_EXC64]		= "FP (AArch64)",
+	[ESR_ELx_EC_SERROR]		= "SError",
+	[ESR_ELx_EC_BREAKPT_LOW]	= "Breakpoint (lower EL)",
+	[ESR_ELx_EC_BREAKPT_CUR]	= "Breakpoint (current EL)",
+	[ESR_ELx_EC_SOFTSTP_LOW]	= "Software Step (lower EL)",
+	[ESR_ELx_EC_SOFTSTP_CUR]	= "Software Step (current EL)",
+	[ESR_ELx_EC_WATCHPT_LOW]	= "Watchpoint (lower EL)",
+	[ESR_ELx_EC_WATCHPT_CUR]	= "Watchpoint (current EL)",
+	[ESR_ELx_EC_BKPT32]		= "BKPT (AArch32)",
+	[ESR_ELx_EC_VECTOR32]		= "Vector catch (AArch32)",
+	[ESR_ELx_EC_BRK64]		= "BRK (AArch64)",
+};
+
+const char *esr_get_class_string(u32 esr)
+{
+	return esr_class_str[esr >> ESR_ELx_EC_SHIFT];
+}
+
+/*
+ * bad_mode handles the impossible case in the exception vector.
+ */
+asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
+{
+	siginfo_t info;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+	console_verbose();
+
+	pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n",
+		handler[reason], esr, esr_get_class_string(esr));
+	__show_regs(regs);
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLOPC;
+	info.si_addr  = pc;
+
+	arm64_notify_die("Oops - bad mode", regs, &info, 0);
+}
+
+void __pte_error(const char *file, int line, unsigned long val)
+{
+	pr_crit("%s:%d: bad pte %016lx.\n", file, line, val);
+}
+
+void __pmd_error(const char *file, int line, unsigned long val)
+{
+	pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val);
+}
+
+void __pud_error(const char *file, int line, unsigned long val)
+{
+	pr_crit("%s:%d: bad pud %016lx.\n", file, line, val);
+}
+
+void __pgd_error(const char *file, int line, unsigned long val)
+{
+	pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val);
+}
+
+void __init trap_init(void)
+{
+	return;
+}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
new file mode 100644
index 000000000..ec37ab3f5
--- /dev/null
+++ b/arch/arm64/kernel/vdso.c
@@ -0,0 +1,231 @@
+/*
+ * VDSO implementation for AArch64 and vector page setup for AArch32.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/clocksource.h>
+#include <linux/elf.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/vmalloc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/signal32.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+
+extern char vdso_start, vdso_end;
+static unsigned long vdso_pages;
+static struct page **vdso_pagelist;
+
+/*
+ * The vDSO data page.
+ */
+static union {
+	struct vdso_data	data;
+	u8			page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+#ifdef CONFIG_COMPAT
+/*
+ * Create and map the vectors page for AArch32 tasks.
+ */
+static struct page *vectors_page[1];
+
+static int alloc_vectors_page(void)
+{
+	extern char __kuser_helper_start[], __kuser_helper_end[];
+	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
+
+	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
+	unsigned long vpage;
+
+	vpage = get_zeroed_page(GFP_ATOMIC);
+
+	if (!vpage)
+		return -ENOMEM;
+
+	/* kuser helpers */
+	memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start,
+		kuser_sz);
+
+	/* sigreturn code */
+	memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
+               __aarch32_sigret_code_start, sigret_sz);
+
+	flush_icache_range(vpage, vpage + PAGE_SIZE);
+	vectors_page[0] = virt_to_page(vpage);
+
+	return 0;
+}
+arch_initcall(alloc_vectors_page);
+
+int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = AARCH32_VECTORS_BASE;
+	static struct vm_special_mapping spec = {
+		.name	= "[vectors]",
+		.pages	= vectors_page,
+
+	};
+	void *ret;
+
+	down_write(&mm->mmap_sem);
+	current->mm->context.vdso = (void *)addr;
+
+	/* Map vectors page at the high address. */
+	ret = _install_special_mapping(mm, addr, PAGE_SIZE,
+				       VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
+				       &spec);
+
+	up_write(&mm->mmap_sem);
+
+	return PTR_ERR_OR_ZERO(ret);
+}
+#endif /* CONFIG_COMPAT */
+
+static struct vm_special_mapping vdso_spec[2];
+
+static int __init vdso_init(void)
+{
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("vDSO is not a valid ELF object!\n");
+		return -EINVAL;
+	}
+
+	vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+	pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
+		vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data);
+
+	/* Allocate the vDSO pagelist, plus a page for the data. */
+	vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
+				GFP_KERNEL);
+	if (vdso_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the vDSO data page. */
+	vdso_pagelist[0] = virt_to_page(vdso_data);
+
+	/* Grab the vDSO code pages. */
+	for (i = 0; i < vdso_pages; i++)
+		vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE);
+
+	/* Populate the special mapping structures */
+	vdso_spec[0] = (struct vm_special_mapping) {
+		.name	= "[vvar]",
+		.pages	= vdso_pagelist,
+	};
+
+	vdso_spec[1] = (struct vm_special_mapping) {
+		.name	= "[vdso]",
+		.pages	= &vdso_pagelist[1],
+	};
+
+	return 0;
+}
+arch_initcall(vdso_init);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+				int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+	void *ret;
+
+	vdso_text_len = vdso_pages << PAGE_SHIFT;
+	/* Be sure to map the data page */
+	vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+
+	down_write(&mm->mmap_sem);
+	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+	if (IS_ERR_VALUE(vdso_base)) {
+		ret = ERR_PTR(vdso_base);
+		goto up_fail;
+	}
+	ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
+				       VM_READ|VM_MAYREAD,
+				       &vdso_spec[0]);
+	if (IS_ERR(ret))
+		goto up_fail;
+
+	vdso_base += PAGE_SIZE;
+	mm->context.vdso = (void *)vdso_base;
+	ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       &vdso_spec[1]);
+	if (IS_ERR(ret))
+		goto up_fail;
+
+
+	up_write(&mm->mmap_sem);
+	return 0;
+
+up_fail:
+	mm->context.vdso = NULL;
+	up_write(&mm->mmap_sem);
+	return PTR_ERR(ret);
+}
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+void update_vsyscall(struct timekeeper *tk)
+{
+	struct timespec xtime_coarse;
+	u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
+
+	++vdso_data->tb_seq_count;
+	smp_wmb();
+
+	xtime_coarse = __current_kernel_time();
+	vdso_data->use_syscall			= use_syscall;
+	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
+	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec;
+	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
+
+	if (!use_syscall) {
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
+		vdso_data->xtime_clock_sec	= tk->xtime_sec;
+		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
+	}
+
+	smp_wmb();
+	++vdso_data->tb_seq_count;
+}
+
+void update_vsyscall_tz(void)
+{
+	vdso_data->tz_minuteswest	= sys_tz.tz_minuteswest;
+	vdso_data->tz_dsttime		= sys_tz.tz_dsttime;
+}
diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore
new file mode 100644
index 000000000..b8cc94e96
--- /dev/null
+++ b/arch/arm64/kernel/vdso/.gitignore
@@ -0,0 +1,2 @@
+vdso.lds
+vdso-offsets.h
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
new file mode 100644
index 000000000..f6fe17d88
--- /dev/null
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -0,0 +1,67 @@
+#
+# Building a vDSO image for AArch64.
+#
+# Author: Will Deacon <will.deacon@arm.com>
+# Heavily based on the vDSO Makefiles for other archs.
+#
+
+obj-vdso := gettimeofday.o note.o sigreturn.o
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
+		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
+# down to collect2, resulting in silent corruption of the vDSO image.
+ccflags-y += -Wl,-shared
+
+obj-y += vdso.o
+extra-y += vdso.lds vdso-offsets.h
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Force dependency (incbin is bad)
+$(obj)/vdso.o : $(obj)/vdso.so
+
+# Link rule for the .so file, .lds has to be first
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
+	$(call if_changed,vdsold)
+
+# Strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+define cmd_vdsosym
+	$(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \
+	cp $@ include/generated/
+endef
+
+$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+	$(call if_changed,vdsosym)
+
+# Assembly rules for the .S files
+$(obj-vdso): %.o: %.S FORCE
+	$(call if_changed_dep,vdsoas)
+
+# Actual build commands
+quiet_cmd_vdsold = VDSOL   $@
+      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
+quiet_cmd_vdsoas = VDSOA   $@
+      cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+
+# Install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso.so: $(obj)/vdso.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
new file mode 100755
index 000000000..01924ff07
--- /dev/null
+++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Author: Will Deacon <will.deacon@arm.com
+#
+
+LC_ALL=C
+sed -n -e 's/^00*/0/' -e \
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
new file mode 100644
index 000000000..efa79e8d4
--- /dev/null
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -0,0 +1,248 @@
+/*
+ * Userspace implementations of gettimeofday() and friends.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+#define NSEC_PER_SEC_LO16	0xca00
+#define NSEC_PER_SEC_HI16	0x3b9a
+
+vdso_data	.req	x6
+use_syscall	.req	w7
+seqcnt		.req	w8
+
+	.macro	seqcnt_acquire
+9999:	ldr	seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
+	tbnz	seqcnt, #0, 9999b
+	dmb	ishld
+	ldr	use_syscall, [vdso_data, #VDSO_USE_SYSCALL]
+	.endm
+
+	.macro	seqcnt_read, cnt
+	dmb	ishld
+	ldr	\cnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
+	.endm
+
+	.macro	seqcnt_check, cnt, fail
+	cmp	\cnt, seqcnt
+	b.ne	\fail
+	.endm
+
+	.text
+
+/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
+ENTRY(__kernel_gettimeofday)
+	.cfi_startproc
+	mov	x2, x30
+	.cfi_register x30, x2
+
+	/* Acquire the sequence counter and get the timespec. */
+	adr	vdso_data, _vdso_data
+1:	seqcnt_acquire
+	cbnz	use_syscall, 4f
+
+	/* If tv is NULL, skip to the timezone code. */
+	cbz	x0, 2f
+	bl	__do_get_tspec
+	seqcnt_check w9, 1b
+
+	/* Convert ns to us. */
+	mov	x13, #1000
+	lsl	x13, x13, x12
+	udiv	x11, x11, x13
+	stp	x10, x11, [x0, #TVAL_TV_SEC]
+2:
+	/* If tz is NULL, return 0. */
+	cbz	x1, 3f
+	ldp	w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
+	stp	w4, w5, [x1, #TZ_MINWEST]
+3:
+	mov	x0, xzr
+	ret	x2
+4:
+	/* Syscall fallback. */
+	mov	x8, #__NR_gettimeofday
+	svc	#0
+	ret	x2
+	.cfi_endproc
+ENDPROC(__kernel_gettimeofday)
+
+/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
+ENTRY(__kernel_clock_gettime)
+	.cfi_startproc
+	cmp	w0, #CLOCK_REALTIME
+	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne
+	b.ne	2f
+
+	mov	x2, x30
+	.cfi_register x30, x2
+
+	/* Get kernel timespec. */
+	adr	vdso_data, _vdso_data
+1:	seqcnt_acquire
+	cbnz	use_syscall, 7f
+
+	bl	__do_get_tspec
+	seqcnt_check w9, 1b
+
+	mov	x30, x2
+
+	cmp	w0, #CLOCK_MONOTONIC
+	b.ne	6f
+
+	/* Get wtm timespec. */
+	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+
+	/* Check the sequence counter. */
+	seqcnt_read w9
+	seqcnt_check w9, 1b
+	b	4f
+2:
+	cmp	w0, #CLOCK_REALTIME_COARSE
+	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
+	b.ne	8f
+
+	/* xtime_coarse_nsec is already right-shifted */
+	mov	x12, #0
+
+	/* Get coarse timespec. */
+	adr	vdso_data, _vdso_data
+3:	seqcnt_acquire
+	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
+
+	/* Get wtm timespec. */
+	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+
+	/* Check the sequence counter. */
+	seqcnt_read w9
+	seqcnt_check w9, 3b
+
+	cmp	w0, #CLOCK_MONOTONIC_COARSE
+	b.ne	6f
+4:
+	/* Add on wtm timespec. */
+	add	x10, x10, x13
+	lsl	x14, x14, x12
+	add	x11, x11, x14
+
+	/* Normalise the new timespec. */
+	mov	x15, #NSEC_PER_SEC_LO16
+	movk	x15, #NSEC_PER_SEC_HI16, lsl #16
+	lsl	x15, x15, x12
+	cmp	x11, x15
+	b.lt	5f
+	sub	x11, x11, x15
+	add	x10, x10, #1
+5:
+	cmp	x11, #0
+	b.ge	6f
+	add	x11, x11, x15
+	sub	x10, x10, #1
+
+6:	/* Store to the user timespec. */
+	lsr	x11, x11, x12
+	stp	x10, x11, [x1, #TSPEC_TV_SEC]
+	mov	x0, xzr
+	ret
+7:
+	mov	x30, x2
+8:	/* Syscall fallback. */
+	mov	x8, #__NR_clock_gettime
+	svc	#0
+	ret
+	.cfi_endproc
+ENDPROC(__kernel_clock_gettime)
+
+/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
+ENTRY(__kernel_clock_getres)
+	.cfi_startproc
+	cmp	w0, #CLOCK_REALTIME
+	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne
+	b.ne	1f
+
+	ldr	x2, 5f
+	b	2f
+1:
+	cmp	w0, #CLOCK_REALTIME_COARSE
+	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
+	b.ne	4f
+	ldr	x2, 6f
+2:
+	cbz	w1, 3f
+	stp	xzr, x2, [x1]
+
+3:	/* res == NULL. */
+	mov	w0, wzr
+	ret
+
+4:	/* Syscall fallback. */
+	mov	x8, #__NR_clock_getres
+	svc	#0
+	ret
+5:
+	.quad	CLOCK_REALTIME_RES
+6:
+	.quad	CLOCK_COARSE_RES
+	.cfi_endproc
+ENDPROC(__kernel_clock_getres)
+
+/*
+ * Read the current time from the architected counter.
+ * Expects vdso_data to be initialised.
+ * Clobbers the temporary registers (x9 - x15).
+ * Returns:
+ *  - w9		= vDSO sequence counter
+ *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec)
+ *  - w12		= cs_shift
+ */
+ENTRY(__do_get_tspec)
+	.cfi_startproc
+
+	/* Read from the vDSO data page. */
+	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
+	seqcnt_read w9
+
+	/* Read the virtual counter. */
+	isb
+	mrs	x15, cntvct_el0
+
+	/* Calculate cycle delta and convert to ns. */
+	sub	x10, x15, x10
+	/* We can only guarantee 56 bits of precision. */
+	movn	x15, #0xff00, lsl #48
+	and	x10, x15, x10
+	mul	x10, x10, x11
+
+	/* Use the kernel time to calculate the new timespec. */
+	mov	x11, #NSEC_PER_SEC_LO16
+	movk	x11, #NSEC_PER_SEC_HI16, lsl #16
+	lsl	x11, x11, x12
+	add	x15, x10, x14
+	udiv	x14, x15, x11
+	add	x10, x13, x14
+	mul	x13, x14, x11
+	sub	x11, x15, x13
+
+	ret
+	.cfi_endproc
+ENDPROC(__do_get_tspec)
diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
new file mode 100644
index 000000000..b82c85e5d
--- /dev/null
+++ b/arch/arm64/kernel/vdso/note.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S
new file mode 100644
index 000000000..20d98effa
--- /dev/null
+++ b/arch/arm64/kernel/vdso/sigreturn.S
@@ -0,0 +1,37 @@
+/*
+ * Sigreturn trampoline for returning from a signal when the SA_RESTORER
+ * flag is not set.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+	.text
+
+	nop
+ENTRY(__kernel_rt_sigreturn)
+	.cfi_startproc
+	.cfi_signal_frame
+	.cfi_def_cfa	x29, 0
+	.cfi_offset	x29, 0 * 8
+	.cfi_offset	x30, 1 * 8
+	mov	x8, #__NR_rt_sigreturn
+	svc	#0
+	.cfi_endproc
+ENDPROC(__kernel_rt_sigreturn)
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
new file mode 100644
index 000000000..60c1db54b
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vdso.S
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso_start, vdso_end
+	.balign PAGE_SIZE
+vdso_start:
+	.incbin "arch/arm64/kernel/vdso/vdso.so"
+	.balign PAGE_SIZE
+vdso_end:
+
+	.previous
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
new file mode 100644
index 000000000..beca249bc
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -0,0 +1,98 @@
+/*
+ * GNU linker script for the VDSO library.
+*
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Heavily based on the vDSO linker scripts for other archs.
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+
+SECTIONS
+{
+	PROVIDE(_vdso_data = . - PAGE_SIZE);
+	. = VDSO_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+
+	.text		: { *(.text*) }			:text	=0xd503201f
+	PROVIDE (__etext = .);
+	PROVIDE (_etext = .);
+	PROVIDE (etext = .);
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: { *(.rodata*) }		:text
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	LINUX_2.6.39 {
+	global:
+		__kernel_rt_sigreturn;
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+	local: *;
+	};
+}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_sigtramp		= __kernel_rt_sigreturn;
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
new file mode 100644
index 000000000..a2c29865c
--- /dev/null
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -0,0 +1,179 @@
+/*
+ * ld script to make ARM Linux kernel
+ * taken from the i386 version by Russell King
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#include "image.h"
+
+/* .exit.text needed in case of alternative patching */
+#define ARM_EXIT_KEEP(x)	x
+#define ARM_EXIT_DISCARD(x)
+
+OUTPUT_ARCH(aarch64)
+ENTRY(_text)
+
+jiffies = jiffies_64;
+
+#define HYPERVISOR_TEXT					\
+	/*						\
+	 * Align to 4 KB so that			\
+	 * a) the HYP vector table is at its minimum	\
+	 *    alignment of 2048 bytes			\
+	 * b) the HYP init code will not cross a page	\
+	 *    boundary if its size does not exceed	\
+	 *    4 KB (see related ASSERT() below)		\
+	 */						\
+	. = ALIGN(SZ_4K);				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
+	*(.hyp.idmap.text)				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
+	VMLINUX_SYMBOL(__hyp_text_start) = .;		\
+	*(.hyp.text)					\
+	VMLINUX_SYMBOL(__hyp_text_end) = .;
+
+/*
+ * The size of the PE/COFF section that covers the kernel image, which
+ * runs from stext to _edata, must be a round multiple of the PE/COFF
+ * FileAlignment, which we set to its minimum value of 0x200. 'stext'
+ * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
+ * boundary should be sufficient.
+ */
+PECOFF_FILE_ALIGNMENT = 0x200;
+
+#ifdef CONFIG_EFI
+#define PECOFF_EDATA_PADDING	\
+	.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+#else
+#define PECOFF_EDATA_PADDING
+#endif
+
+#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#define ALIGN_DEBUG_RO			. = ALIGN(1<<SECTION_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
+#else
+#define ALIGN_DEBUG_RO
+#define ALIGN_DEBUG_RO_MIN(min)		. = ALIGN(min);
+#endif
+
+SECTIONS
+{
+	/*
+	 * XXX: The linker does not define how output sections are
+	 * assigned to input sections when there are multiple statements
+	 * matching the same input section name.  There is no documented
+	 * order of matching.
+	 */
+	/DISCARD/ : {
+		ARM_EXIT_DISCARD(EXIT_TEXT)
+		ARM_EXIT_DISCARD(EXIT_DATA)
+		EXIT_CALL
+		*(.discard)
+		*(.discard.*)
+	}
+
+	. = PAGE_OFFSET + TEXT_OFFSET;
+
+	.head.text : {
+		_text = .;
+		HEAD_TEXT
+	}
+	ALIGN_DEBUG_RO
+	.text : {			/* Real text segment		*/
+		_stext = .;		/* Text and read-only data	*/
+			__exception_text_start = .;
+			*(.exception.text)
+			__exception_text_end = .;
+			IRQENTRY_TEXT
+			TEXT_TEXT
+			SCHED_TEXT
+			LOCK_TEXT
+			HYPERVISOR_TEXT
+			*(.fixup)
+			*(.gnu.warning)
+		. = ALIGN(16);
+		*(.got)			/* Global offset table		*/
+	}
+
+	ALIGN_DEBUG_RO
+	RO_DATA(PAGE_SIZE)
+	EXCEPTION_TABLE(8)
+	NOTES
+	ALIGN_DEBUG_RO
+	_etext = .;			/* End of text and rodata section */
+
+	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+	__init_begin = .;
+
+	INIT_TEXT_SECTION(8)
+	.exit.text : {
+		ARM_EXIT_KEEP(EXIT_TEXT)
+	}
+
+	ALIGN_DEBUG_RO_MIN(16)
+	.init.data : {
+		INIT_DATA
+		INIT_SETUP(16)
+		INIT_CALLS
+		CON_INITCALL
+		SECURITY_INITCALL
+		INIT_RAM_FS
+	}
+	.exit.data : {
+		ARM_EXIT_KEEP(EXIT_DATA)
+	}
+
+	PERCPU_SECTION(64)
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+	. = ALIGN(4);
+	.altinstructions : {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+	.altinstr_replacement : {
+		*(.altinstr_replacement)
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	_data = .;
+	_sdata = .;
+	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+	PECOFF_EDATA_PADDING
+	_edata = .;
+
+	BSS_SECTION(0, 0, 0)
+
+	. = ALIGN(PAGE_SIZE);
+	idmap_pg_dir = .;
+	. += IDMAP_DIR_SIZE;
+	swapper_pg_dir = .;
+	. += SWAPPER_DIR_SIZE;
+
+	_end = .;
+
+	STABS_DEBUG
+
+	HEAD_SYMBOLS
+}
+
+/*
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
+ */
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
+	"HYP init code too big or misaligned")
+
+/*
+ * If padding is applied before .head.text, virt<->phys conversions will fail.
+ */
+ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned")
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
new file mode 100644
index 000000000..5105e297e
--- /dev/null
+++ b/arch/arm64/kvm/Kconfig
@@ -0,0 +1,54 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	---help---
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	bool "Kernel-based Virtual Machine (KVM) support"
+	depends on OF
+	select MMU_NOTIFIER
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
+	select KVM_MMIO
+	select KVM_ARM_HOST
+	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+	select SRCU
+	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_IRQFD
+	---help---
+	  Support hosting virtualized guest machines.
+
+	  If unsure, say N.
+
+config KVM_ARM_HOST
+	bool
+	---help---
+	  Provides host support for ARM processors.
+
+config KVM_ARM_MAX_VCPUS
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
+	help
+	  Static number of max supported virtual CPUs per VM.
+
+	  If you choose a high number, the vcpu structures will be quite
+	  large, so only choose a reasonable number that you expect to
+	  actually use.
+
+endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 000000000..d5904f876
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,29 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Iarch/arm64/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+KVM=../../../virt/kvm
+ARM=../../../arch/arm/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c
new file mode 100644
index 000000000..f87d8fbaa
--- /dev/null
+++ b/arch/arm64/kvm/emulate.c
@@ -0,0 +1,159 @@
+/*
+ * (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/esr.h>
+#include <asm/kvm_emulate.h>
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+
+	if (esr & ESR_ELx_CV)
+		return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
+
+	return -1;
+}
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr;
+	u32 cpsr_cond;
+	int cond;
+
+	/* Top two bits non-zero?  Unconditional. */
+	if (kvm_vcpu_get_hsr(vcpu) >> 30)
+		return true;
+
+	/* Is condition field valid? */
+	cond = kvm_vcpu_get_condition(vcpu);
+	if (cond == 0xE)
+		return true;
+
+	cpsr = *vcpu_cpsr(vcpu);
+
+	if (cond < 0) {
+		/* This can happen in Thumb mode: examine IT state. */
+		unsigned long it;
+
+		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+		/* it == 0 => unconditional. */
+		if (it == 0)
+			return true;
+
+		/* The cond for this insn works out as the top 4 bits. */
+		cond = (it >> 4);
+	}
+
+	cpsr_cond = cpsr >> 28;
+
+	if (!((cc_map[cond] >> cpsr_cond) & 1))
+		return false;
+
+	return true;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu:	The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+	unsigned long itbits, cond;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_arm = !(cpsr & COMPAT_PSR_T_BIT);
+
+	BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK));
+
+	if (!(cpsr & COMPAT_PSR_IT_MASK))
+		return;
+
+	cond = (cpsr & 0xe000) >> 13;
+	itbits = (cpsr & 0x1c00) >> (10 - 2);
+	itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+	/* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
+	if ((itbits & 0x7) == 0)
+		itbits = cond = 0;
+	else
+		itbits = (itbits << 1) & 0x1f;
+
+	cpsr &= ~COMPAT_PSR_IT_MASK;
+	cpsr |= cond << 13;
+	cpsr |= (itbits & 0x1c) << (10 - 2);
+	cpsr |= (itbits & 0x3) << 25;
+	*vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	bool is_thumb;
+
+	is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT);
+	if (is_thumb && !is_wide_instr)
+		*vcpu_pc(vcpu) += 2;
+	else
+		*vcpu_pc(vcpu) += 4;
+	kvm_adjust_itstate(vcpu);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 000000000..9535bd555
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputype.h>
+#include <asm/uaccess.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/*
+	 * Because the kvm_regs structure is a mix of 32, 64 and
+	 * 128bit fields, we index it as if it was a 32bit
+	 * array. Hence below, nr_regs is the number of entries, and
+	 * off the index in the "array".
+	 */
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	u32 off;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	__uint128_t tmp;
+	void *valp = &tmp;
+	u64 off;
+	int err = 0;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
+		return -EINVAL;
+
+	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+		u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
+		switch (mode) {
+		case COMPAT_PSR_MODE_USR:
+		case COMPAT_PSR_MODE_FIQ:
+		case COMPAT_PSR_MODE_IRQ:
+		case COMPAT_PSR_MODE_SVC:
+		case COMPAT_PSR_MODE_ABT:
+		case COMPAT_PSR_MODE_UND:
+		case PSR_MODE_EL0t:
+		case PSR_MODE_EL1t:
+		case PSR_MODE_EL1h:
+			break;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+out:
+	return err;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+	return sizeof(struct kvm_regs) / sizeof(__u32);
+}
+
+/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return -EFAULT;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+                + NUM_TIMER_REGS;
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we apppend system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+	int ret;
+
+	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+		if (put_user(core_reg | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
+	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we want a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return get_core_reg(vcpu, reg);
+
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_get_reg(vcpu, reg);
+}
+
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we set a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return set_core_reg(vcpu, reg);
+
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_set_reg(vcpu, reg);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	unsigned long part_number = read_cpuid_part_number();
+
+	switch (implementor) {
+	case ARM_CPU_IMP_ARM:
+		switch (part_number) {
+		case ARM_CPU_PART_AEM_V8:
+			return KVM_ARM_TARGET_AEM_V8;
+		case ARM_CPU_PART_FOUNDATION:
+			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A53:
+			return KVM_ARM_TARGET_CORTEX_A53;
+		case ARM_CPU_PART_CORTEX_A57:
+			return KVM_ARM_TARGET_CORTEX_A57;
+		};
+		break;
+	case ARM_CPU_IMP_APM:
+		switch (part_number) {
+		case APM_CPU_PART_POTENZA:
+			return KVM_ARM_TARGET_XGENE_POTENZA;
+		};
+		break;
+	};
+
+	return -EINVAL;
+}
+
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
new file mode 100644
index 000000000..524fa2567
--- /dev/null
+++ b/arch/arm64/kvm/handle_exit.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/handle_exit.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include <asm/esr.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int ret;
+
+	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
+			    kvm_vcpu_hvc_get_imm(vcpu));
+
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
+		return 1;
+	}
+
+	return ret;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/**
+ * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
+ *		    instruction executed by a guest
+ *
+ * @vcpu:	the vcpu pointer
+ *
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
+		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
+		kvm_vcpu_on_spin(vcpu);
+	} else {
+		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
+		kvm_vcpu_block(vcpu);
+	}
+
+	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+
+	return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+	[ESR_ELx_EC_WFx]	= kvm_handle_wfx,
+	[ESR_ELx_EC_CP15_32]	= kvm_handle_cp15_32,
+	[ESR_ELx_EC_CP15_64]	= kvm_handle_cp15_64,
+	[ESR_ELx_EC_CP14_MR]	= kvm_handle_cp14_32,
+	[ESR_ELx_EC_CP14_LS]	= kvm_handle_cp14_load_store,
+	[ESR_ELx_EC_CP14_64]	= kvm_handle_cp14_64,
+	[ESR_ELx_EC_HVC32]	= handle_hvc,
+	[ESR_ELx_EC_SMC32]	= handle_smc,
+	[ESR_ELx_EC_HVC64]	= handle_hvc,
+	[ESR_ELx_EC_SMC64]	= handle_smc,
+	[ESR_ELx_EC_SYS64]	= kvm_handle_sys_reg,
+	[ESR_ELx_EC_IABT_LOW]	= kvm_handle_guest_abort,
+	[ESR_ELx_EC_DABT_LOW]	= kvm_handle_guest_abort,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT;
+
+	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+	    !arm_exit_handlers[hsr_ec]) {
+		kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
+			hsr, esr_get_class_string(hsr));
+		BUG();
+	}
+
+	return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		       int exception_index)
+{
+	exit_handle_fn exit_handler;
+
+	switch (exception_index) {
+	case ARM_EXCEPTION_IRQ:
+		return 1;
+	case ARM_EXCEPTION_TRAP:
+		/*
+		 * See ARM ARM B1.14.1: "Hyp traps on instructions
+		 * that fail their condition code check"
+		 */
+		if (!kvm_condition_valid(vcpu)) {
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+
+		return exit_handler(vcpu, run);
+	default:
+		kvm_pr_unimpl("Unsupported exception type: %d",
+			      exception_index);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return 0;
+	}
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
new file mode 100644
index 000000000..178ba2248
--- /dev/null
+++ b/arch/arm64/kvm/hyp-init.S
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
+
+	.text
+	.pushsection	.hyp.idmap.text, "ax"
+
+	.align	11
+
+ENTRY(__kvm_hyp_init)
+	ventry	__invalid		// Synchronous EL2t
+	ventry	__invalid		// IRQ EL2t
+	ventry	__invalid		// FIQ EL2t
+	ventry	__invalid		// Error EL2t
+
+	ventry	__invalid		// Synchronous EL2h
+	ventry	__invalid		// IRQ EL2h
+	ventry	__invalid		// FIQ EL2h
+	ventry	__invalid		// Error EL2h
+
+	ventry	__do_hyp_init		// Synchronous 64-bit EL1
+	ventry	__invalid		// IRQ 64-bit EL1
+	ventry	__invalid		// FIQ 64-bit EL1
+	ventry	__invalid		// Error 64-bit EL1
+
+	ventry	__invalid		// Synchronous 32-bit EL1
+	ventry	__invalid		// IRQ 32-bit EL1
+	ventry	__invalid		// FIQ 32-bit EL1
+	ventry	__invalid		// Error 32-bit EL1
+
+__invalid:
+	b	.
+
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP pgd
+	 * x2: HYP stack
+	 * x3: HYP vectors
+	 */
+__do_hyp_init:
+
+	msr	ttbr0_el2, x0
+
+	mrs	x4, tcr_el1
+	ldr	x5, =TCR_EL2_MASK
+	and	x4, x4, x5
+	ldr	x5, =TCR_EL2_FLAGS
+	orr	x4, x4, x5
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+	/*
+	 * If we are running with VA_BITS < 48, we may be running with an extra
+	 * level of translation in the ID map. This is only the case if system
+	 * RAM is out of range for the currently configured page size and number
+	 * of translation levels, in which case we will also need the extra
+	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+	 *
+	 * However, at EL2, there is only one TTBR register, and we can't switch
+	 * between translation tables *and* update TCR_EL2.T0SZ at the same
+	 * time. Bottom line: we need the extra level in *both* our translation
+	 * tables.
+	 *
+	 * So use the same T0SZ value we use for the ID map.
+	 */
+	ldr_l	x5, idmap_t0sz
+	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+#endif
+	msr	tcr_el2, x4
+
+	ldr	x4, =VTCR_EL2_FLAGS
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR0_EL1
+	bfi	x4, x5, #16, #3
+	msr	vtcr_el2, x4
+
+	mrs	x4, mair_el1
+	msr	mair_el2, x4
+	isb
+
+	/* Invalidate the stale TLBs from Bootloader */
+	tlbi	alle2
+	dsb	sy
+
+	mrs	x4, sctlr_el2
+	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_EL2_FLAGS
+	orr	x4, x4, x5
+	msr	sctlr_el2, x4
+	isb
+
+	/* Skip the trampoline dance if we merged the boot and runtime PGDs */
+	cmp	x0, x1
+	b.eq	merged
+
+	/* MMU is now enabled. Get ready for the trampoline dance */
+	ldr	x4, =TRAMPOLINE_VA
+	adr	x5, target
+	bfi	x4, x5, #0, #PAGE_SHIFT
+	br	x4
+
+target: /* We're now in the trampoline code, switch page tables */
+	msr	ttbr0_el2, x1
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+merged:
+	/* Set the stack and new vectors */
+	kern_hyp_va	x2
+	mov	sp, x2
+	kern_hyp_va	x3
+	msr	vbar_el2, x3
+
+	/* Hello, World! */
+	eret
+ENDPROC(__kvm_hyp_init)
+
+	.ltorg
+
+	.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 000000000..5befd010e
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,1298 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/debug-monitors.h>
+#include <asm/esr.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/memory.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
+#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+	.align	PAGE_SHIFT
+
+.macro save_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	stp	x19, x20, [x3]
+	stp	x21, x22, [x3, #16]
+	stp	x23, x24, [x3, #32]
+	stp	x25, x26, [x3, #48]
+	stp	x27, x28, [x3, #64]
+	stp	x29, lr, [x3, #80]
+
+	mrs	x19, sp_el0
+	mrs	x20, elr_el2		// EL1 PC
+	mrs	x21, spsr_el2		// EL1 pstate
+
+	stp	x19, x20, [x3, #96]
+	str	x21, [x3, #112]
+
+	mrs	x22, sp_el1
+	mrs	x23, elr_el1
+	mrs	x24, spsr_el1
+
+	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+.endm
+
+.macro restore_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+
+	msr	sp_el1, x22
+	msr	elr_el1, x23
+	msr	spsr_el1, x24
+
+	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
+	ldp	x19, x20, [x3]
+	ldr	x21, [x3, #16]
+
+	msr	sp_el0, x19
+	msr	elr_el2, x20 				// EL1 PC
+	msr	spsr_el2, x21 				// EL1 pstate
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	ldp	x19, x20, [x3]
+	ldp	x21, x22, [x3, #16]
+	ldp	x23, x24, [x3, #32]
+	ldp	x25, x26, [x3, #48]
+	ldp	x27, x28, [x3, #64]
+	ldp	x29, lr, [x3, #80]
+.endm
+
+.macro save_host_regs
+	save_common_regs
+.endm
+
+.macro restore_host_regs
+	restore_common_regs
+.endm
+
+.macro save_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_save x3, 4
+.endm
+
+.macro restore_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_restore x3, 4
+.endm
+
+.macro save_guest_regs
+	// x0 is the vcpu address
+	// x1 is the return code, do not corrupt!
+	// x2 is the cpu context
+	// x3 is a tmp register
+	// Guest's x0-x3 are on the stack
+
+	// Compute base to save registers
+	add	x3, x2, #CPU_XREG_OFFSET(4)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	str	x18, [x3, #112]
+
+	pop	x6, x7			// x2, x3
+	pop	x4, x5			// x0, x1
+
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	save_common_regs
+.endm
+
+.macro restore_guest_regs
+	// x0 is the vcpu address.
+	// x2 is the cpu context
+	// x3 is a tmp register
+
+	// Prepare x0-x3 for later restore
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	push	x4, x5		// Push x0-x3 on the stack
+	push	x6, x7
+
+	// x4-x18
+	ldp	x4, x5, [x3, #32]
+	ldp	x6, x7, [x3, #48]
+	ldp	x8, x9, [x3, #64]
+	ldp	x10, x11, [x3, #80]
+	ldp	x12, x13, [x3, #96]
+	ldp	x14, x15, [x3, #112]
+	ldp	x16, x17, [x3, #128]
+	ldr	x18, [x3, #144]
+
+	// x19-x29, lr, sp*, elr*, spsr*
+	restore_common_regs
+
+	// Last bits of the 64bit state
+	pop	x2, x3
+	pop	x0, x1
+
+	// Do not touch any register after this!
+.endm
+
+/*
+ * Macros to perform system register save/restore.
+ *
+ * Ordering here is absolutely critical, and must be kept consistent
+ * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
+ * and in kvm_asm.h.
+ *
+ * In other words, don't touch any of these unless you know what
+ * you are doing.
+ */
+.macro save_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	mrs	x4,	vmpidr_el2
+	mrs	x5,	csselr_el1
+	mrs	x6,	sctlr_el1
+	mrs	x7,	actlr_el1
+	mrs	x8,	cpacr_el1
+	mrs	x9,	ttbr0_el1
+	mrs	x10,	ttbr1_el1
+	mrs	x11,	tcr_el1
+	mrs	x12,	esr_el1
+	mrs	x13, 	afsr0_el1
+	mrs	x14,	afsr1_el1
+	mrs	x15,	far_el1
+	mrs	x16,	mair_el1
+	mrs	x17,	vbar_el1
+	mrs	x18,	contextidr_el1
+	mrs	x19,	tpidr_el0
+	mrs	x20,	tpidrro_el0
+	mrs	x21,	tpidr_el1
+	mrs	x22, 	amair_el1
+	mrs	x23, 	cntkctl_el1
+	mrs	x24,	par_el1
+	mrs	x25,	mdscr_el1
+
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	stp	x18, x19, [x3, #112]
+	stp	x20, x21, [x3, #128]
+	stp	x22, x23, [x3, #144]
+	stp	x24, x25, [x3, #160]
+.endm
+
+.macro save_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbcr15_el1
+	mrs	x19, dbgbcr14_el1
+	mrs	x18, dbgbcr13_el1
+	mrs	x17, dbgbcr12_el1
+	mrs	x16, dbgbcr11_el1
+	mrs	x15, dbgbcr10_el1
+	mrs	x14, dbgbcr9_el1
+	mrs	x13, dbgbcr8_el1
+	mrs	x12, dbgbcr7_el1
+	mrs	x11, dbgbcr6_el1
+	mrs	x10, dbgbcr5_el1
+	mrs	x9, dbgbcr4_el1
+	mrs	x8, dbgbcr3_el1
+	mrs	x7, dbgbcr2_el1
+	mrs	x6, dbgbcr1_el1
+	mrs	x5, dbgbcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbvr15_el1
+	mrs	x19, dbgbvr14_el1
+	mrs	x18, dbgbvr13_el1
+	mrs	x17, dbgbvr12_el1
+	mrs	x16, dbgbvr11_el1
+	mrs	x15, dbgbvr10_el1
+	mrs	x14, dbgbvr9_el1
+	mrs	x13, dbgbvr8_el1
+	mrs	x12, dbgbvr7_el1
+	mrs	x11, dbgbvr6_el1
+	mrs	x10, dbgbvr5_el1
+	mrs	x9, dbgbvr4_el1
+	mrs	x8, dbgbvr3_el1
+	mrs	x7, dbgbvr2_el1
+	mrs	x6, dbgbvr1_el1
+	mrs	x5, dbgbvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwcr15_el1
+	mrs	x19, dbgwcr14_el1
+	mrs	x18, dbgwcr13_el1
+	mrs	x17, dbgwcr12_el1
+	mrs	x16, dbgwcr11_el1
+	mrs	x15, dbgwcr10_el1
+	mrs	x14, dbgwcr9_el1
+	mrs	x13, dbgwcr8_el1
+	mrs	x12, dbgwcr7_el1
+	mrs	x11, dbgwcr6_el1
+	mrs	x10, dbgwcr5_el1
+	mrs	x9, dbgwcr4_el1
+	mrs	x8, dbgwcr3_el1
+	mrs	x7, dbgwcr2_el1
+	mrs	x6, dbgwcr1_el1
+	mrs	x5, dbgwcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwvr15_el1
+	mrs	x19, dbgwvr14_el1
+	mrs	x18, dbgwvr13_el1
+	mrs	x17, dbgwvr12_el1
+	mrs	x16, dbgwvr11_el1
+	mrs	x15, dbgwvr10_el1
+	mrs	x14, dbgwvr9_el1
+	mrs	x13, dbgwvr8_el1
+	mrs	x12, dbgwvr7_el1
+	mrs	x11, dbgwvr6_el1
+	mrs	x10, dbgwvr5_el1
+	mrs	x9, dbgwvr4_el1
+	mrs	x8, dbgwvr3_el1
+	mrs	x7, dbgwvr2_el1
+	mrs	x6, dbgwvr1_el1
+	mrs	x5, dbgwvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	mrs	x21, mdccint_el1
+	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+.endm
+
+.macro restore_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	ldp	x8, x9, [x3, #32]
+	ldp	x10, x11, [x3, #48]
+	ldp	x12, x13, [x3, #64]
+	ldp	x14, x15, [x3, #80]
+	ldp	x16, x17, [x3, #96]
+	ldp	x18, x19, [x3, #112]
+	ldp	x20, x21, [x3, #128]
+	ldp	x22, x23, [x3, #144]
+	ldp	x24, x25, [x3, #160]
+
+	msr	vmpidr_el2,	x4
+	msr	csselr_el1,	x5
+	msr	sctlr_el1,	x6
+	msr	actlr_el1,	x7
+	msr	cpacr_el1,	x8
+	msr	ttbr0_el1,	x9
+	msr	ttbr1_el1,	x10
+	msr	tcr_el1,	x11
+	msr	esr_el1,	x12
+	msr	afsr0_el1,	x13
+	msr	afsr1_el1,	x14
+	msr	far_el1,	x15
+	msr	mair_el1,	x16
+	msr	vbar_el1,	x17
+	msr	contextidr_el1,	x18
+	msr	tpidr_el0,	x19
+	msr	tpidrro_el0,	x20
+	msr	tpidr_el1,	x21
+	msr	amair_el1,	x22
+	msr	cntkctl_el1,	x23
+	msr	par_el1,	x24
+	msr	mdscr_el1,	x25
+.endm
+
+.macro restore_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbcr15_el1, x20
+	msr	dbgbcr14_el1, x19
+	msr	dbgbcr13_el1, x18
+	msr	dbgbcr12_el1, x17
+	msr	dbgbcr11_el1, x16
+	msr	dbgbcr10_el1, x15
+	msr	dbgbcr9_el1, x14
+	msr	dbgbcr8_el1, x13
+	msr	dbgbcr7_el1, x12
+	msr	dbgbcr6_el1, x11
+	msr	dbgbcr5_el1, x10
+	msr	dbgbcr4_el1, x9
+	msr	dbgbcr3_el1, x8
+	msr	dbgbcr2_el1, x7
+	msr	dbgbcr1_el1, x6
+	msr	dbgbcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbvr15_el1, x20
+	msr	dbgbvr14_el1, x19
+	msr	dbgbvr13_el1, x18
+	msr	dbgbvr12_el1, x17
+	msr	dbgbvr11_el1, x16
+	msr	dbgbvr10_el1, x15
+	msr	dbgbvr9_el1, x14
+	msr	dbgbvr8_el1, x13
+	msr	dbgbvr7_el1, x12
+	msr	dbgbvr6_el1, x11
+	msr	dbgbvr5_el1, x10
+	msr	dbgbvr4_el1, x9
+	msr	dbgbvr3_el1, x8
+	msr	dbgbvr2_el1, x7
+	msr	dbgbvr1_el1, x6
+	msr	dbgbvr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwcr15_el1, x20
+	msr	dbgwcr14_el1, x19
+	msr	dbgwcr13_el1, x18
+	msr	dbgwcr12_el1, x17
+	msr	dbgwcr11_el1, x16
+	msr	dbgwcr10_el1, x15
+	msr	dbgwcr9_el1, x14
+	msr	dbgwcr8_el1, x13
+	msr	dbgwcr7_el1, x12
+	msr	dbgwcr6_el1, x11
+	msr	dbgwcr5_el1, x10
+	msr	dbgwcr4_el1, x9
+	msr	dbgwcr3_el1, x8
+	msr	dbgwcr2_el1, x7
+	msr	dbgwcr1_el1, x6
+	msr	dbgwcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwvr15_el1, x20
+	msr	dbgwvr14_el1, x19
+	msr	dbgwvr13_el1, x18
+	msr	dbgwvr12_el1, x17
+	msr	dbgwvr11_el1, x16
+	msr	dbgwvr10_el1, x15
+	msr	dbgwvr9_el1, x14
+	msr	dbgwvr8_el1, x13
+	msr	dbgwvr7_el1, x12
+	msr	dbgwvr6_el1, x11
+	msr	dbgwvr5_el1, x10
+	msr	dbgwvr4_el1, x9
+	msr	dbgwvr3_el1, x8
+	msr	dbgwvr2_el1, x7
+	msr	dbgwvr1_el1, x6
+	msr	dbgwvr0_el1, x5
+
+	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+	msr	mdccint_el1, x21
+.endm
+
+.macro skip_32bit_state tmp, target
+	// Skip 32bit state if not needed
+	mrs	\tmp, hcr_el2
+	tbnz	\tmp, #HCR_RW_SHIFT, \target
+.endm
+
+.macro skip_tee_state tmp, target
+	// Skip ThumbEE state if not needed
+	mrs	\tmp, id_pfr0_el1
+	tbz	\tmp, #12, \target
+.endm
+
+.macro skip_debug_state tmp, target
+	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
+	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
+.endm
+
+.macro compute_debug_state target
+	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
+	// is set, we do a full save/restore cycle and disable trapping.
+	add	x25, x0, #VCPU_CONTEXT
+
+	// Check the state of MDSCR_EL1
+	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
+	and	x26, x25, #DBG_MDSCR_KDE
+	and	x25, x25, #DBG_MDSCR_MDE
+	adds	xzr, x25, x26
+	b.eq	9998f		// Nothing to see there
+
+	// If any interesting bits was set, we must set the flag
+	mov	x26, #KVM_ARM64_DEBUG_DIRTY
+	str	x26, [x0, #VCPU_DEBUG_FLAGS]
+	b	9999f		// Don't skip restore
+
+9998:
+	// Otherwise load the flags from memory in case we recently
+	// trapped
+	skip_debug_state x25, \target
+9999:
+.endm
+
+.macro save_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	mrs	x4, spsr_abt
+	mrs	x5, spsr_und
+	mrs	x6, spsr_irq
+	mrs	x7, spsr_fiq
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	mrs	x4, dacr32_el2
+	mrs	x5, ifsr32_el2
+	mrs	x6, fpexc32_el2
+	stp	x4, x5, [x3]
+	str	x6, [x3, #16]
+
+	skip_debug_state x8, 2f
+	mrs	x7, dbgvcr32_el2
+	str	x7, [x3, #24]
+2:
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	mrs	x4, teecr32_el1
+	mrs	x5, teehbr32_el1
+	stp	x4, x5, [x3]
+1:
+.endm
+
+.macro restore_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	spsr_abt, x4
+	msr	spsr_und, x5
+	msr	spsr_irq, x6
+	msr	spsr_fiq, x7
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	ldp	x4, x5, [x3]
+	ldr	x6, [x3, #16]
+	msr	dacr32_el2, x4
+	msr	ifsr32_el2, x5
+	msr	fpexc32_el2, x6
+
+	skip_debug_state x8, 2f
+	ldr	x7, [x3, #24]
+	msr	dbgvcr32_el2, x7
+2:
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	ldp	x4, x5, [x3]
+	msr	teecr32_el1, x4
+	msr	teehbr32_el1, x5
+1:
+.endm
+
+.macro activate_traps
+	ldr     x2, [x0, #VCPU_HCR_EL2]
+	msr     hcr_el2, x2
+	mov	x2, #CPTR_EL2_TTA
+	msr	cptr_el2, x2
+
+	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
+	msr	hstr_el2, x2
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	orr	x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
+
+	// Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
+	// if not dirty.
+	ldr	x3, [x0, #VCPU_DEBUG_FLAGS]
+	tbnz	x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
+	orr	x2, x2,  #MDCR_EL2_TDA
+1:
+	msr	mdcr_el2, x2
+.endm
+
+.macro deactivate_traps
+	mov	x2, #HCR_RW
+	msr	hcr_el2, x2
+	msr	cptr_el2, xzr
+	msr	hstr_el2, xzr
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	msr	mdcr_el2, x2
+.endm
+
+.macro activate_vm
+	ldr	x1, [x0, #VCPU_KVM]
+	kern_hyp_va	x1
+	ldr	x2, [x1, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+.endm
+
+.macro deactivate_vm
+	msr	vttbr_el2, xzr
+.endm
+
+/*
+ * Call into the vgic backend for state saving
+ */
+.macro save_vgic_state
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, VGIC_SAVE_FN]
+	kern_hyp_va	x24
+	blr	x24
+	mrs	x24, hcr_el2
+	mov	x25, #HCR_INT_OVERRIDE
+	neg	x25, x25
+	and	x24, x24, x25
+	msr	hcr_el2, x24
+.endm
+
+/*
+ * Call into the vgic backend for state restoring
+ */
+.macro restore_vgic_state
+	mrs	x24, hcr_el2
+	ldr	x25, [x0, #VCPU_IRQ_LINES]
+	orr	x24, x24, #HCR_INT_OVERRIDE
+	orr	x24, x24, x25
+	msr	hcr_el2, x24
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, #VGIC_RESTORE_FN]
+	kern_hyp_va	x24
+	blr	x24
+.endm
+
+.macro save_timer_state
+	// x0: vcpu pointer
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	mrs	x3, cntv_ctl_el0
+	and	x3, x3, #3
+	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
+	bic	x3, x3, #1		// Clear Enable
+	msr	cntv_ctl_el0, x3
+
+	isb
+
+	mrs	x3, cntv_cval_el0
+	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
+
+1:
+	// Allow physical timer/counter access for the host
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #3
+	msr	cnthctl_el2, x2
+
+	// Clear cntvoff for the host
+	msr	cntvoff_el2, xzr
+.endm
+
+.macro restore_timer_state
+	// x0: vcpu pointer
+	// Disallow physical timer access for the guest
+	// Physical counter access is allowed
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #1
+	bic	x2, x2, #2
+	msr	cnthctl_el2, x2
+
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
+	msr	cntvoff_el2, x3
+	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
+	msr	cntv_cval_el0, x2
+	isb
+
+	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
+	and	x2, x2, #3
+	msr	cntv_ctl_el0, x2
+1:
+.endm
+
+__save_sysregs:
+	save_sysregs
+	ret
+
+__restore_sysregs:
+	restore_sysregs
+	ret
+
+__save_debug:
+	save_debug
+	ret
+
+__restore_debug:
+	restore_debug
+	ret
+
+__save_fpsimd:
+	save_fpsimd
+	ret
+
+__restore_fpsimd:
+	restore_fpsimd
+	ret
+
+/*
+ * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+ *
+ * This is the world switch. The first half of the function
+ * deals with entering the guest, and anything from __kvm_vcpu_return
+ * to the end of the function deals with reentering the host.
+ * On the enter path, only x0 (vcpu pointer) must be preserved until
+ * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
+ * code) must both be preserved until the epilogue.
+ * In both cases, x2 points to the CPU context we're saving/restoring from/to.
+ */
+ENTRY(__kvm_vcpu_run)
+	kern_hyp_va	x0
+	msr	tpidr_el2, x0	// Save the vcpu register
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	save_host_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	compute_debug_state 1f
+	bl	__save_debug
+1:
+	activate_traps
+	activate_vm
+
+	restore_vgic_state
+	restore_timer_state
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	bl	__restore_debug
+1:
+	restore_guest_32bit_state
+	restore_guest_regs
+
+	// That's it, no more messing around.
+	eret
+
+__kvm_vcpu_return:
+	// Assume x0 is the vcpu pointer, x1 the return code
+	// Guest's x0-x3 are on the stack
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	save_guest_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	skip_debug_state x3, 1f
+	bl	__save_debug
+1:
+	save_guest_32bit_state
+
+	save_timer_state
+	save_vgic_state
+
+	deactivate_traps
+	deactivate_vm
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	// Clear the dirty flag for the next run, as all the state has
+	// already been saved. Note that we nuke the whole 64bit word.
+	// If we ever add more flags, we'll have to be more careful...
+	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
+	bl	__restore_debug
+1:
+	restore_host_regs
+
+	mov	x0, x1
+	ret
+END(__kvm_vcpu_run)
+
+// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+ENTRY(__kvm_tlb_flush_vmid_ipa)
+	dsb	ishst
+
+	kern_hyp_va	x0
+	ldr	x2, [x0, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+	isb
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	lsr	x1, x1, #12
+	tlbi	ipas2e1is, x1
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb	ish
+	tlbi	vmalle1is
+	dsb	ish
+	isb
+
+	msr	vttbr_el2, xzr
+	ret
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
+
+/**
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
+ * @struct kvm *kvm - pointer to kvm structure
+ *
+ * Invalidates all Stage 1 and 2 TLB entries for current VMID.
+ */
+ENTRY(__kvm_tlb_flush_vmid)
+	dsb     ishst
+
+	kern_hyp_va     x0
+	ldr     x2, [x0, #KVM_VTTBR]
+	msr     vttbr_el2, x2
+	isb
+
+	tlbi    vmalls12e1is
+	dsb     ish
+	isb
+
+	msr     vttbr_el2, xzr
+	ret
+ENDPROC(__kvm_tlb_flush_vmid)
+
+ENTRY(__kvm_flush_vm_context)
+	dsb	ishst
+	tlbi	alle1is
+	ic	ialluis
+	dsb	ish
+	ret
+ENDPROC(__kvm_flush_vm_context)
+
+	// struct vgic_sr_vectors __vgi_sr_vectors;
+	.align 3
+ENTRY(__vgic_sr_vectors)
+	.skip	VGIC_SR_VECTOR_SZ
+ENDPROC(__vgic_sr_vectors)
+
+__kvm_hyp_panic:
+	// Guess the context by looking at VTTBR:
+	// If zero, then we're already a host.
+	// Otherwise restore a minimal host context before panicing.
+	mrs	x0, vttbr_el2
+	cbz	x0, 1f
+
+	mrs	x0, tpidr_el2
+
+	deactivate_traps
+	deactivate_vm
+
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+
+1:	adr	x0, __hyp_panic_str
+	adr	x1, 2f
+	ldp	x2, x3, [x1]
+	sub	x0, x0, x2
+	add	x0, x0, x3
+	mrs	x1, spsr_el2
+	mrs	x2, elr_el2
+	mrs	x3, esr_el2
+	mrs	x4, far_el2
+	mrs	x5, hpfar_el2
+	mrs	x6, par_el1
+	mrs	x7, tpidr_el2
+
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+
+	.align	3
+2:	.quad	HYP_PAGE_OFFSET
+	.quad	PAGE_OFFSET
+ENDPROC(__kvm_hyp_panic)
+
+__hyp_panic_str:
+	.ascii	"HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+
+	.align	2
+
+/*
+ * u64 kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed).  The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
+ * passed in r0 and r1.
+ *
+ * A function pointer with a value of 0 has a special meaning, and is
+ * used to implement __hyp_get_vectors in the same way as in
+ * arch/arm64/kernel/hyp_stub.S.
+ */
+ENTRY(kvm_call_hyp)
+	hvc	#0
+	ret
+ENDPROC(kvm_call_hyp)
+
+.macro invalid_vector	label, target
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
+	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_error_invalid, __kvm_hyp_panic
+
+el1_sync:					// Guest trapped into EL2
+	push	x0, x1
+	push	x2, x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_ELx_EC_SHIFT
+
+	cmp	x2, #ESR_ELx_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap			// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	pop	x2, x3
+	pop	x0, x1
+
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	push	lr, xzr
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	pop	lr, xzr
+2:	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+	cmp	x2, #ESR_ELx_EC_DABT_LOW
+	mov	x0, #ESR_ELx_EC_IABT_LOW
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+	and	x2, x1, #ESR_ELx_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	push	x3, xzr
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	pop	x0, xzr			// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	w1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__kvm_vcpu_return
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	pop	x2, x3
+	pop	x0, x1
+
+	eret
+
+el1_irq:
+	push	x0, x1
+	push	x2, x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__kvm_vcpu_return
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__kvm_hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
+
+	.popsection
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 000000000..f02530e72
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,203 @@
+/*
+ * Fault injection for both 32 and 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/esr.h>
+
+#define PSTATE_FAULT_BITS_64 	(PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
+				 PSR_I_BIT | PSR_D_BIT)
+#define EL1_EXCEPT_SYNC_OFFSET	0x200
+
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+	unsigned long cpsr;
+	unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+	bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
+	u32 return_offset = (is_thumb) ? 4 : 0;
+	u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+
+	cpsr = mode | COMPAT_PSR_I_BIT;
+
+	if (sctlr & (1 << 30))
+		cpsr |= COMPAT_PSR_T_BIT;
+	if (sctlr & (1 << 25))
+		cpsr |= COMPAT_PSR_E_BIT;
+
+	*vcpu_cpsr(vcpu) = cpsr;
+
+	/* Note: These now point to the banked copies */
+	*vcpu_spsr(vcpu) = new_spsr_value;
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+	/* Branch to exception vector */
+	if (sctlr & (1 << 13))
+		vect_offset += 0xffff0000;
+	else /* always have security exceptions */
+		vect_offset += vcpu_cp15(vcpu, c12_VBAR);
+
+	*vcpu_pc(vcpu) = vect_offset;
+}
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+			 unsigned long addr)
+{
+	u32 vect_offset;
+	u32 *far, *fsr;
+	bool is_lpae;
+
+	if (is_pabt) {
+		vect_offset = 12;
+		far = &vcpu_cp15(vcpu, c6_IFAR);
+		fsr = &vcpu_cp15(vcpu, c5_IFSR);
+	} else { /* !iabt */
+		vect_offset = 16;
+		far = &vcpu_cp15(vcpu, c6_DFAR);
+		fsr = &vcpu_cp15(vcpu, c5_DFSR);
+	}
+
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
+
+	*far = addr;
+
+	/* Give the guest an IMPLEMENTATION DEFINED exception */
+	is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
+	if (is_lpae)
+		*fsr = 1 << 9 | 0x34;
+	else
+		*fsr = 0x14;
+}
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_aarch32;
+	u32 esr = 0;
+
+	is_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+
+	/*
+	 * Build an {i,d}abort, depending on the level and the
+	 * instruction set. Report an external synchronous abort.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_ELx_IL;
+
+	/*
+	 * Here, the guest runs in AArch64 mode when in EL1. If we get
+	 * an AArch32 fault, it means we managed to trap an EL0 fault.
+	 */
+	if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
+		esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT);
+	else
+		esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT);
+
+	if (!is_iabt)
+		esr |= ESR_ELx_EC_DABT_LOW;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	/*
+	 * Build an unknown exception, depending on the instruction
+	 * set.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_ELx_IL;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, false, addr);
+
+	inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, true, addr);
+
+	inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_undef32(vcpu);
+
+	inject_undef64(vcpu);
+}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 000000000..bbc6ae32e
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+	(offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+	/* USR Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12), USR_REG_OFFSET(13),	USR_REG_OFFSET(14),
+		REG_OFFSET(pc)
+	},
+
+	/* FIQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+		REG_OFFSET(compat_r8_fiq),  /* r8 */
+		REG_OFFSET(compat_r9_fiq),  /* r9 */
+		REG_OFFSET(compat_r10_fiq), /* r10 */
+		REG_OFFSET(compat_r11_fiq), /* r11 */
+		REG_OFFSET(compat_r12_fiq), /* r12 */
+		REG_OFFSET(compat_sp_fiq),  /* r13 */
+		REG_OFFSET(compat_lr_fiq),  /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* IRQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_irq), /* r13 */
+		REG_OFFSET(compat_lr_irq), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* SVC Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_svc), /* r13 */
+		REG_OFFSET(compat_lr_svc), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* ABT Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_abt), /* r13 */
+		REG_OFFSET(compat_lr_abt), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* UND Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_und), /* r13 */
+		REG_OFFSET(compat_lr_und), /* r14 */
+		REG_OFFSET(pc)
+	},
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+
+	switch (mode) {
+	case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
+		mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+		break;
+
+	case COMPAT_PSR_MODE_ABT:
+		mode = 4;
+		break;
+
+	case COMPAT_PSR_MODE_UND:
+		mode = 5;
+		break;
+
+	case COMPAT_PSR_MODE_SYS:
+		mode = 0;	/* SYS maps to USR */
+		break;
+
+	default:
+		BUG();
+	}
+
+	return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+	switch (mode) {
+	case COMPAT_PSR_MODE_SVC:
+		mode = KVM_SPSR_SVC;
+		break;
+	case COMPAT_PSR_MODE_ABT:
+		mode = KVM_SPSR_ABT;
+		break;
+	case COMPAT_PSR_MODE_UND:
+		mode = KVM_SPSR_UND;
+		break;
+	case COMPAT_PSR_MODE_IRQ:
+		mode = KVM_SPSR_IRQ;
+		break;
+	case COMPAT_PSR_MODE_FIQ:
+		mode = KVM_SPSR_FIQ;
+		break;
+	default:
+		BUG();
+	}
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
new file mode 100644
index 000000000..0b4326578
--- /dev/null
+++ b/arch/arm64/kvm/reset.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/reset.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include <kvm/arm_arch_timer.h>
+
+#include <asm/cputype.h>
+#include <asm/ptrace.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+
+/*
+ * ARMv8 Reset Values
+ */
+static const struct kvm_regs default_regs_reset = {
+	.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
+			PSR_F_BIT | PSR_D_BIT),
+};
+
+static const struct kvm_regs default_regs_reset32 = {
+	.regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT |
+			COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
+};
+
+static const struct kvm_irq_level default_vtimer_irq = {
+	.irq	= 27,
+	.level	= 1,
+};
+
+static bool cpu_has_32bit_el1(void)
+{
+	u64 pfr0;
+
+	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	return !!(pfr0 & 0x20);
+}
+
+int kvm_arch_dev_ioctl_check_extension(long ext)
+{
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_ARM_EL1_32BIT:
+		r = cpu_has_32bit_el1();
+		break;
+	default:
+		r = 0;
+	}
+
+	return r;
+}
+
+/**
+ * kvm_reset_vcpu - sets core registers and sys_regs to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on
+ * the virtual CPU struct to their architectually defined reset
+ * values.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+	const struct kvm_irq_level *cpu_vtimer_irq;
+	const struct kvm_regs *cpu_reset;
+
+	switch (vcpu->arch.target) {
+	default:
+		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+			if (!cpu_has_32bit_el1())
+				return -EINVAL;
+			cpu_reset = &default_regs_reset32;
+		} else {
+			cpu_reset = &default_regs_reset;
+		}
+
+		cpu_vtimer_irq = &default_vtimer_irq;
+		break;
+	}
+
+	/* Reset core registers */
+	memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+
+	/* Reset system registers */
+	kvm_reset_sys_regs(vcpu);
+
+	/* Reset timer */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
+	return 0;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
new file mode 100644
index 000000000..c370b4014
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.c
@@ -0,0 +1,1521 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.com.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/debug-monitors.h>
+#include <asm/esr.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_mmu.h>
+
+#include <trace/events/kvm.h>
+
+#include "sys_regs.h"
+
+/*
+ * All of this file is extremly similar to the ARM coproc.c, but the
+ * types are different. My gut feeling is that it should be pretty
+ * easy to merge, but that would be an ABI breakage -- again. VFP
+ * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
+ */
+
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+	u32 ccsidr;
+
+	/* Make sure noone else changes CSSELR during this! */
+	local_irq_disable();
+	/* Put value into CSSELR */
+	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	isb();
+	/* Read result out of CCSIDR */
+	asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+	local_irq_enable();
+
+	return ccsidr;
+}
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
+	kvm_set_way_flush(vcpu);
+	return true;
+}
+
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set. If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct sys_reg_params *p,
+			  const struct sys_reg_desc *r)
+{
+	unsigned long val;
+	bool was_enabled = vcpu_has_cache_enabled(vcpu);
+
+	BUG_ON(!p->is_write);
+
+	val = *vcpu_reg(vcpu, p->Rt);
+	if (!p->is_aarch32) {
+		vcpu_sys_reg(vcpu, r->reg) = val;
+	} else {
+		if (!p->is_32bit)
+			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
+		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+	}
+
+	kvm_toggle_cache(vcpu, was_enabled);
+	return true;
+}
+
+/*
+ * Trap handler for the GICv3 SGI generation system register.
+ * Forward the request to the VGIC emulation.
+ * The cp15_64 code makes sure this automatically works
+ * for both AArch64 and AArch32 accesses.
+ */
+static bool access_gic_sgi(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	u64 val;
+
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
+	val = *vcpu_reg(vcpu, p->Rt);
+	vgic_v3_dispatch_sgi(vcpu, val);
+
+	return true;
+}
+
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+	else
+		return read_zero(vcpu, p);
+}
+
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
+		return true;
+	}
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+				   const struct sys_reg_params *p,
+				   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u32 val;
+		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
+		*vcpu_reg(vcpu, p->Rt) = val;
+		return true;
+	}
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ * 
+ * - If we've touched any debug register, it is likely that we're
+ *   going to touch more of them. It then makes sense to disable the
+ *   traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ *   then mandatory to save/restore the registers, as the guest
+ *   depends on them.
+ * 
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ *   disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ *   set the dirty bit, disable the traps, save host registers,
+ *   restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ *   registers and clear the dirty bit. This ensure that the host can
+ *   now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+	}
+
+	return true;
+}
+
+static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 amair;
+
+	asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
+	vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+}
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 mpidr;
+
+	/*
+	 * Map the vcpu_id into the first three affinity level fields of
+	 * the MPIDR. We limit the number of VCPUs in level 0 due to a
+	 * limitation to 16 CPUs in that level in the ICC_SGIxR registers
+	 * of the GICv3 to be able to address each CPU directly when
+	 * sending IPIs.
+	 */
+	mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
+	mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
+	mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
+	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
+}
+
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
+	/* DBGBVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
+	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\
+	/* DBGBCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
+	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\
+	/* DBGWVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
+	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 },		\
+	/* DBGWCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
+	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ *
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
+ */
+static const struct sys_reg_desc sys_reg_descs[] = {
+	/* DC ISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
+	  access_dcsw },
+	/* DC CSW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
+	  access_dcsw },
+	/* DC CISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
+	  access_dcsw },
+
+	DBG_BCR_BVR_WCR_WVR_EL1(0),
+	DBG_BCR_BVR_WCR_WVR_EL1(1),
+	/* MDCCINT_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+	/* MDSCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+	DBG_BCR_BVR_WCR_WVR_EL1(2),
+	DBG_BCR_BVR_WCR_WVR_EL1(3),
+	DBG_BCR_BVR_WCR_WVR_EL1(4),
+	DBG_BCR_BVR_WCR_WVR_EL1(5),
+	DBG_BCR_BVR_WCR_WVR_EL1(6),
+	DBG_BCR_BVR_WCR_WVR_EL1(7),
+	DBG_BCR_BVR_WCR_WVR_EL1(8),
+	DBG_BCR_BVR_WCR_WVR_EL1(9),
+	DBG_BCR_BVR_WCR_WVR_EL1(10),
+	DBG_BCR_BVR_WCR_WVR_EL1(11),
+	DBG_BCR_BVR_WCR_WVR_EL1(12),
+	DBG_BCR_BVR_WCR_WVR_EL1(13),
+	DBG_BCR_BVR_WCR_WVR_EL1(14),
+	DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+	/* MDRAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  trap_raz_wi },
+	/* OSLAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
+	  trap_raz_wi },
+	/* OSLSR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
+	  trap_oslsr_el1 },
+	/* OSDLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGPRCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGCLAIMSET_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGCLAIMCLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGAUTHSTATUS_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
+	  trap_dbgauthstatus_el1 },
+
+	/* TEECR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEECR32_EL1, 0 },
+	/* TEEHBR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEEHBR32_EL1, 0 },
+
+	/* MDCCSR_EL1 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR[TR]X_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
+	  trap_raz_wi },
+
+	/* DBGVCR32_EL2 */
+	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
+	  NULL, reset_val, DBGVCR32_EL2, 0 },
+
+	/* MPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
+	  NULL, reset_mpidr, MPIDR_EL1 },
+	/* SCTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
+	/* CPACR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, CPACR_EL1, 0 },
+	/* TTBR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
+	  access_vm_reg, reset_unknown, TTBR0_EL1 },
+	/* TTBR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
+	  access_vm_reg, reset_unknown, TTBR1_EL1 },
+	/* TCR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
+	  access_vm_reg, reset_val, TCR_EL1, 0 },
+
+	/* AFSR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
+	  access_vm_reg, reset_unknown, AFSR0_EL1 },
+	/* AFSR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
+	  access_vm_reg, reset_unknown, AFSR1_EL1 },
+	/* ESR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
+	  access_vm_reg, reset_unknown, ESR_EL1 },
+	/* FAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
+	  access_vm_reg, reset_unknown, FAR_EL1 },
+	/* PAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+	  NULL, reset_unknown, PAR_EL1 },
+
+	/* PMINTENSET_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
+	  trap_raz_wi },
+	/* PMINTENCLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
+	  trap_raz_wi },
+
+	/* MAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
+	  access_vm_reg, reset_unknown, MAIR_EL1 },
+	/* AMAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
+	  access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+
+	/* VBAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, VBAR_EL1, 0 },
+
+	/* ICC_SGI1R_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101),
+	  access_gic_sgi },
+	/* ICC_SRE_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101),
+	  trap_raz_wi },
+
+	/* CONTEXTIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
+	  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
+	/* TPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
+	  NULL, reset_unknown, TPIDR_EL1 },
+
+	/* CNTKCTL_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
+	  NULL, reset_val, CNTKCTL_EL1, 0},
+
+	/* CSSELR_EL1 */
+	{ Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, CSSELR_EL1 },
+
+	/* PMCR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
+	  trap_raz_wi },
+	/* PMCNTENSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
+	  trap_raz_wi },
+	/* PMCNTENCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
+	  trap_raz_wi },
+	/* PMOVSCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
+	  trap_raz_wi },
+	/* PMSWINC_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
+	  trap_raz_wi },
+	/* PMSELR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
+	  trap_raz_wi },
+	/* PMCEID0_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
+	  trap_raz_wi },
+	/* PMCEID1_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
+	  trap_raz_wi },
+	/* PMCCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
+	  trap_raz_wi },
+	/* PMXEVTYPER_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
+	  trap_raz_wi },
+	/* PMXEVCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
+	  trap_raz_wi },
+	/* PMUSERENR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
+	  trap_raz_wi },
+	/* PMOVSSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
+	  trap_raz_wi },
+
+	/* TPIDR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
+	  NULL, reset_unknown, TPIDR_EL0 },
+	/* TPIDRRO_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
+	  NULL, reset_unknown, TPIDRRO_EL0 },
+
+	/* DACR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, DACR32_EL2 },
+	/* IFSR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, IFSR32_EL2 },
+	/* FPEXC32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
+	  NULL, reset_val, FPEXC32_EL2, 0x70 },
+};
+
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+		u32 el3 = !!((pfr >> 12) & 0xf);
+
+		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
+					  (((dfr >> 12) & 0xf) << 24) |
+					  (((dfr >> 28) & 0xf) << 20) |
+					  (6 << 16) | (el3 << 14) | (el3 << 12));
+		return true;
+	}
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+	}
+
+	return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n)					\
+	/* DBGBVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32,	\
+	  NULL, (cp14_DBGBVR0 + (n) * 2) },			\
+	/* DBGBCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32,	\
+	  NULL, (cp14_DBGBCR0 + (n) * 2) },			\
+	/* DBGWVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32,	\
+	  NULL, (cp14_DBGWVR0 + (n) * 2) },			\
+	/* DBGWCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32,	\
+	  NULL, (cp14_DBGWCR0 + (n) * 2) }
+
+#define DBGBXVR(n)						\
+	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32,	\
+	  NULL, cp14_DBGBXVR0 + n * 2 }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, whould this principle change.
+ */
+static const struct sys_reg_desc cp14_regs[] = {
+	/* DBGIDR */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+	/* DBGDTRRXext */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+	DBG_BCR_BVR_WCR_WVR(0),
+	/* DBGDSCRint */
+	{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(1),
+	/* DBGDCCINT */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
+	/* DBGDSCRext */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(2),
+	/* DBGDTR[RT]Xint */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+	/* DBGDTR[RT]Xext */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(3),
+	DBG_BCR_BVR_WCR_WVR(4),
+	DBG_BCR_BVR_WCR_WVR(5),
+	/* DBGWFAR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+	/* DBGOSECCR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(6),
+	/* DBGVCR */
+	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(7),
+	DBG_BCR_BVR_WCR_WVR(8),
+	DBG_BCR_BVR_WCR_WVR(9),
+	DBG_BCR_BVR_WCR_WVR(10),
+	DBG_BCR_BVR_WCR_WVR(11),
+	DBG_BCR_BVR_WCR_WVR(12),
+	DBG_BCR_BVR_WCR_WVR(13),
+	DBG_BCR_BVR_WCR_WVR(14),
+	DBG_BCR_BVR_WCR_WVR(15),
+
+	/* DBGDRAR (32bit) */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+	DBGBXVR(0),
+	/* DBGOSLAR */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+	DBGBXVR(1),
+	/* DBGOSLSR */
+	{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+	DBGBXVR(2),
+	DBGBXVR(3),
+	/* DBGOSDLR */
+	{ Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+	DBGBXVR(4),
+	/* DBGPRCR */
+	{ Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+	DBGBXVR(5),
+	DBGBXVR(6),
+	DBGBXVR(7),
+	DBGBXVR(8),
+	DBGBXVR(9),
+	DBGBXVR(10),
+	DBGBXVR(11),
+	DBGBXVR(12),
+	DBGBXVR(13),
+	DBGBXVR(14),
+	DBGBXVR(15),
+
+	/* DBGDSAR (32bit) */
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+	/* DBGDEVID2 */
+	{ Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+	/* DBGDEVID1 */
+	{ Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+	/* DBGDEVID */
+	{ Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+	/* DBGCLAIMSET */
+	{ Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+	/* DBGCLAIMCLR */
+	{ Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+	/* DBGAUTHSTATUS */
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
+};
+
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+	/* DBGDRAR (64bit) */
+	{ Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+	/* DBGDSAR (64bit) */
+	{ Op1( 0), CRm( 2), .access = trap_raz_wi },
+};
+
+/*
+ * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
+ * depending on the way they are accessed (as a 32bit or a 64bit
+ * register).
+ */
+static const struct sys_reg_desc cp15_regs[] = {
+	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
+
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
+	{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+
+	/*
+	 * DC{C,I,CI}SW operations:
+	 */
+	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+
+	/* PMU */
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
+
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+
+	/* ICC_SRE */
+	{ Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+
+	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+};
+
+static const struct sys_reg_desc cp15_64_regs[] = {
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
+	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+};
+
+/* Target specific emulation tables */
+static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table)
+{
+	target_tables[target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct sys_reg_desc *get_target_table(unsigned target,
+						   bool mode_is_64,
+						   size_t *num)
+{
+	struct kvm_sys_reg_target_table *table;
+
+	table = target_tables[target];
+	if (mode_is_64) {
+		*num = table->table64.num;
+		return table->table64.table;
+	} else {
+		*num = table->table32.num;
+		return table->table32.table;
+	}
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		const struct sys_reg_desc *r = &table[i];
+
+		if (params->Op0 != r->Op0)
+			continue;
+		if (params->Op1 != r->Op1)
+			continue;
+		if (params->CRn != r->CRn)
+			continue;
+		if (params->CRm != r->CRm)
+			continue;
+		if (params->Op2 != r->Op2)
+			continue;
+
+		return r;
+	}
+	return NULL;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/*
+ * emulate_cp --  tries to match a sys_reg access in a handling table, and
+ *                call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_params *params,
+		      const struct sys_reg_desc *table,
+		      size_t num)
+{
+	const struct sys_reg_desc *r;
+
+	if (!table)
+		return -1;	/* Not handled */
+
+	r = find_reg(params, table, num);
+
+	if (r) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+		}
+
+		/* Handled */
+		return 0;
+	}
+
+	/* Not handled */
+	return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+				struct sys_reg_params *params)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+	int cp;
+
+	switch(hsr_ec) {
+	case ESR_ELx_EC_CP15_32:
+	case ESR_ELx_EC_CP15_64:
+		cp = 15;
+		break;
+	case ESR_ELx_EC_CP14_MR:
+	case ESR_ELx_EC_CP14_64:
+		cp = 14;
+		break;
+	default:
+		WARN_ON((cp = -1));
+	}
+
+	kvm_err("Unsupported guest CP%d access at: %08lx\n",
+		cp, *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	kvm_inject_undefined(vcpu);
+}
+
+/**
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt2 = (hsr >> 10) & 0xf;
+
+	params.is_aarch32 = true;
+	params.is_32bit = false;
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 16) & 0xf;
+	params.Op2 = 0;
+	params.CRn = 0;
+
+	/*
+	 * Massive hack here. Store Rt2 in the top 32bits so we only
+	 * have one register to deal with. As we use the same trap
+	 * backends between AArch32 and AArch64, we get away with it.
+	 */
+	if (params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val &= 0xffffffff;
+		val |= *vcpu_reg(vcpu, Rt2) << 32;
+		*vcpu_reg(vcpu, params.Rt) = val;
+	}
+
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		goto out;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		goto out;
+
+	unhandled_cp_access(vcpu, &params);
+
+out:
+	/* Do the opposite hack for the read side */
+	if (!params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val >>= 32;
+		*vcpu_reg(vcpu, Rt2) = val;
+	}
+
+	return 1;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	params.is_aarch32 = true;
+	params.is_32bit = true;
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt  = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+	params.CRn = (hsr >> 10) & 0xf;
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 14) & 0x7;
+	params.Op2 = (hsr >> 17) & 0x7;
+
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		return 1;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		return 1;
+
+	unhandled_cp_access(vcpu, &params);
+	return 1;
+}
+
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_64(vcpu,
+				cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_32(vcpu,
+				cp15_regs, ARRAY_SIZE(cp15_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_64(vcpu,
+				cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
+				NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_32(vcpu,
+				cp14_regs, ARRAY_SIZE(cp14_regs),
+				NULL, 0);
+}
+
+static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+		/* If access function fails, it should complain. */
+	} else {
+		kvm_err("Unsupported guest sys_reg access at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(params);
+	}
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
+			      const struct sys_reg_desc *table, size_t num)
+{
+	unsigned long i;
+
+	for (i = 0; i < num; i++)
+		if (table[i].reset)
+			table[i].reset(vcpu, &table[i]);
+}
+
+/**
+ * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+
+	params.is_aarch32 = false;
+	params.is_32bit = false;
+	params.Op0 = (esr >> 20) & 3;
+	params.Op1 = (esr >> 14) & 0x7;
+	params.CRn = (esr >> 10) & 0xf;
+	params.CRm = (esr >> 1) & 0xf;
+	params.Op2 = (esr >> 17) & 0x7;
+	params.Rt = (esr >> 5) & 0x1f;
+	params.is_write = !(esr & 1);
+
+	return emulate_sys_reg(vcpu, &params);
+}
+
+/******************************************************************************
+ * Userspace API
+ *****************************************************************************/
+
+static bool index_to_params(u64 id, struct sys_reg_params *params)
+{
+	switch (id & KVM_REG_SIZE_MASK) {
+	case KVM_REG_SIZE_U64:
+		/* Any unused index bits means it's not valid. */
+		if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+			      | KVM_REG_ARM_COPROC_MASK
+			      | KVM_REG_ARM64_SYSREG_OP0_MASK
+			      | KVM_REG_ARM64_SYSREG_OP1_MASK
+			      | KVM_REG_ARM64_SYSREG_CRN_MASK
+			      | KVM_REG_ARM64_SYSREG_CRM_MASK
+			      | KVM_REG_ARM64_SYSREG_OP2_MASK))
+			return false;
+		params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
+		params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
+		params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
+		params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
+		params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* Decode an index value, and find the sys_reg_desc entry. */
+static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
+						    u64 id)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+	struct sys_reg_params params;
+
+	/* We only do sys_reg for now. */
+	if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
+		return NULL;
+
+	if (!index_to_params(id, &params))
+		return NULL;
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+	r = find_reg(&params, table, num);
+	if (!r)
+		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	/* Not saved in the sys_reg array? */
+	if (r && !r->reg)
+		r = NULL;
+
+	return r;
+}
+
+/*
+ * These are the invariant sys_reg registers: we let the guest see the
+ * host versions of these, so they're part of the guest state.
+ *
+ * A future CPU may provide a mechanism to present different values to
+ * the guest, or a future kvm may trap them.
+ */
+
+#define FUNCTION_INVARIANT(reg)						\
+	static void get_##reg(struct kvm_vcpu *v,			\
+			      const struct sys_reg_desc *r)		\
+	{								\
+		u64 val;						\
+									\
+		asm volatile("mrs %0, " __stringify(reg) "\n"		\
+			     : "=r" (val));				\
+		((struct sys_reg_desc *)r)->val = val;			\
+	}
+
+FUNCTION_INVARIANT(midr_el1)
+FUNCTION_INVARIANT(ctr_el0)
+FUNCTION_INVARIANT(revidr_el1)
+FUNCTION_INVARIANT(id_pfr0_el1)
+FUNCTION_INVARIANT(id_pfr1_el1)
+FUNCTION_INVARIANT(id_dfr0_el1)
+FUNCTION_INVARIANT(id_afr0_el1)
+FUNCTION_INVARIANT(id_mmfr0_el1)
+FUNCTION_INVARIANT(id_mmfr1_el1)
+FUNCTION_INVARIANT(id_mmfr2_el1)
+FUNCTION_INVARIANT(id_mmfr3_el1)
+FUNCTION_INVARIANT(id_isar0_el1)
+FUNCTION_INVARIANT(id_isar1_el1)
+FUNCTION_INVARIANT(id_isar2_el1)
+FUNCTION_INVARIANT(id_isar3_el1)
+FUNCTION_INVARIANT(id_isar4_el1)
+FUNCTION_INVARIANT(id_isar5_el1)
+FUNCTION_INVARIANT(clidr_el1)
+FUNCTION_INVARIANT(aidr_el1)
+
+/* ->val is filled in by kvm_sys_reg_table_init() */
+static struct sys_reg_desc invariant_sys_regs[] = {
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, get_midr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
+	  NULL, get_revidr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  NULL, get_id_pfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
+	  NULL, get_id_pfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
+	  NULL, get_id_dfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
+	  NULL, get_id_afr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
+	  NULL, get_id_mmfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
+	  NULL, get_id_mmfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
+	  NULL, get_id_mmfr2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
+	  NULL, get_id_mmfr3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  NULL, get_id_isar0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
+	  NULL, get_id_isar1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  NULL, get_id_isar2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
+	  NULL, get_id_isar3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
+	  NULL, get_id_isar4_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
+	  NULL, get_id_isar5_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_clidr_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
+	  NULL, get_aidr_el1 },
+	{ Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_ctr_el0 },
+};
+
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
+{
+	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
+{
+	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int get_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	return reg_to_user(uaddr, &r->val, id);
+}
+
+static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+	int err;
+	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	err = reg_from_user(&val, uaddr, id);
+	if (err)
+		return err;
+
+	/* This is what we mean by invariant: you can't change it. */
+	if (r->val != val)
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool is_valid_cache(u32 val)
+{
+	u32 level, ctype;
+
+	if (val >= CSSELR_MAX)
+		return false;
+
+	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
+	level = (val >> 1);
+	ctype = (cache_levels >> (level * 3)) & 7;
+
+	switch (ctype) {
+	case 0: /* No cache */
+		return false;
+	case 1: /* Instruction cache only */
+		return (val & 1);
+	case 2: /* Data cache only */
+	case 4: /* Unified cache */
+		return !(val & 1);
+	case 3: /* Separate instruction and data caches */
+		return true;
+	default: /* Reserved: we can't know instruction or data. */
+		return false;
+	}
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+	u32 val;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		return put_user(get_ccsidr(val), uval);
+	default:
+		return -ENOENT;
+	}
+}
+
+static int demux_c15_set(u64 id, void __user *uaddr)
+{
+	u32 val, newval;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		if (get_user(newval, uval))
+			return -EFAULT;
+
+		/* This is also invariant: you can't change it. */
+		if (newval != get_ccsidr(val))
+			return -EINVAL;
+		return 0;
+	default:
+		return -ENOENT;
+	}
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_get(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return get_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+}
+
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_set(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return set_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+}
+
+static unsigned int num_demux_regs(void)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < CSSELR_MAX; i++)
+		if (is_valid_cache(i))
+			count++;
+
+	return count;
+}
+
+static int write_demux_regids(u64 __user *uindices)
+{
+	u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	unsigned int i;
+
+	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
+	for (i = 0; i < CSSELR_MAX; i++) {
+		if (!is_valid_cache(i))
+			continue;
+		if (put_user(val | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+	return 0;
+}
+
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
+{
+	return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
+		KVM_REG_ARM64_SYSREG |
+		(reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
+		(reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
+		(reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
+		(reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
+		(reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
+}
+
+static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
+{
+	if (!*uind)
+		return true;
+
+	if (put_user(sys_reg_to_index(reg), *uind))
+		return false;
+
+	(*uind)++;
+	return true;
+}
+
+/* Assumed ordered tables, see kvm_sys_reg_table_init. */
+static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
+{
+	const struct sys_reg_desc *i1, *i2, *end1, *end2;
+	unsigned int total = 0;
+	size_t num;
+
+	/* We check for duplicates here, to allow arch-specific overrides. */
+	i1 = get_target_table(vcpu->arch.target, true, &num);
+	end1 = i1 + num;
+	i2 = sys_reg_descs;
+	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
+
+	BUG_ON(i1 == end1 || i2 == end2);
+
+	/* Walk carefully, as both tables may refer to the same register. */
+	while (i1 || i2) {
+		int cmp = cmp_sys_reg(i1, i2);
+		/* target-specific overrides generic entry. */
+		if (cmp <= 0) {
+			/* Ignore registers we trap but don't save. */
+			if (i1->reg) {
+				if (!copy_reg_to_user(i1, &uind))
+					return -EFAULT;
+				total++;
+			}
+		} else {
+			/* Ignore registers we trap but don't save. */
+			if (i2->reg) {
+				if (!copy_reg_to_user(i2, &uind))
+					return -EFAULT;
+				total++;
+			}
+		}
+
+		if (cmp <= 0 && ++i1 == end1)
+			i1 = NULL;
+		if (cmp >= 0 && ++i2 == end2)
+			i2 = NULL;
+	}
+	return total;
+}
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
+{
+	return ARRAY_SIZE(invariant_sys_regs)
+		+ num_demux_regs()
+		+ walk_sys_regs(vcpu, (u64 __user *)NULL);
+}
+
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	int err;
+
+	/* Then give them all the invariant registers' indices. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
+		if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	err = walk_sys_regs(vcpu, uindices);
+	if (err < 0)
+		return err;
+	uindices += err;
+
+	return write_demux_regids(uindices);
+}
+
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 1; i < n; i++) {
+		if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+void kvm_sys_reg_table_init(void)
+{
+	unsigned int i;
+	struct sys_reg_desc clidr;
+
+	/* Make sure tables are unique and in order. */
+	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+
+	/* We abuse the reset function to overwrite the table itself. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
+		invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+
+	/*
+	 * CLIDR format is awkward, so clean it up.  See ARM B4.1.20:
+	 *
+	 *   If software reads the Cache Type fields from Ctype1
+	 *   upwards, once it has seen a value of 0b000, no caches
+	 *   exist at further-out levels of the hierarchy. So, for
+	 *   example, if Ctype3 is the first Cache Type field with a
+	 *   value of 0b000, the values of Ctype4 to Ctype7 must be
+	 *   ignored.
+	 */
+	get_clidr_el1(NULL, &clidr); /* Ugly... */
+	cache_levels = clidr.val;
+	for (i = 0; i < 7; i++)
+		if (((cache_levels >> (i*3)) & 7) == 0)
+			break;
+	/* Clear all higher bits. */
+	cache_levels &= (1 << (i*3))-1;
+}
+
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
+{
+	size_t num;
+	const struct sys_reg_desc *table;
+
+	/* Catch someone adding a register without putting in reset entry. */
+	memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
+
+	/* Generic chip reset first (so target could override). */
+	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+	reset_sys_reg_descs(vcpu, table, num);
+
+	for (num = 1; num < NR_SYS_REGS; num++)
+		if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+			panic("Didn't reset vcpu_sys_reg(%zi)", num);
+}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
new file mode 100644
index 000000000..d411e2514
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
+#define __ARM64_KVM_SYS_REGS_LOCAL_H__
+
+struct sys_reg_params {
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+	u8	Rt;
+	bool	is_write;
+	bool	is_aarch32;
+	bool	is_32bit;	/* Only valid if is_aarch32 is true */
+};
+
+struct sys_reg_desc {
+	/* MRS/MSR instruction which accesses it. */
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+
+	/* Trapped access from guest, if non-NULL. */
+	bool (*access)(struct kvm_vcpu *,
+		       const struct sys_reg_params *,
+		       const struct sys_reg_desc *);
+
+	/* Initialization for vcpu. */
+	void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
+
+	/* Index into sys_reg[], or 0 if we don't need to save it. */
+	int reg;
+
+	/* Value (usually reset value) */
+	u64 val;
+};
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+	/* Look, we even formatted it for you to paste into the table! */
+	kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+		      p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+				const struct sys_reg_params *p)
+{
+	return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+			     const struct sys_reg_params *p)
+{
+	*vcpu_reg(vcpu, p->Rt) = 0;
+	return true;
+}
+
+static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
+				      const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg write to read-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
+					const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg read to write-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = r->val;
+}
+
+static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
+			      const struct sys_reg_desc *i2)
+{
+	BUG_ON(i1 == i2);
+	if (!i1)
+		return 1;
+	else if (!i2)
+		return -1;
+	if (i1->Op0 != i2->Op0)
+		return i1->Op0 - i2->Op0;
+	if (i1->Op1 != i2->Op1)
+		return i1->Op1 - i2->Op1;
+	if (i1->CRn != i2->CRn)
+		return i1->CRn - i2->CRn;
+	if (i1->CRm != i2->CRm)
+		return i1->CRm - i2->CRm;
+	return i1->Op2 - i2->Op2;
+}
+
+
+#define Op0(_x) 	.Op0 = _x
+#define Op1(_x) 	.Op1 = _x
+#define CRn(_x)		.CRn = _x
+#define CRm(_x) 	.CRm = _x
+#define Op2(_x) 	.Op2 = _x
+
+#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
new file mode 100644
index 000000000..475fd2929
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <linux/init.h>
+
+#include "sys_regs.h"
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 actlr;
+
+	asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
+	vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+}
+
+/*
+ * Implementation specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc genericv8_sys_regs[] = {
+	/* ACTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr, reset_actlr, ACTLR_EL1 },
+};
+
+static const struct sys_reg_desc genericv8_cp15_regs[] = {
+	/* ACTLR */
+	{ Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr },
+};
+
+static struct kvm_sys_reg_target_table genericv8_target_table = {
+	.table64 = {
+		.table = genericv8_sys_regs,
+		.num = ARRAY_SIZE(genericv8_sys_regs),
+	},
+	.table32 = {
+		.table = genericv8_cp15_regs,
+		.num = ARRAY_SIZE(genericv8_cp15_regs),
+	},
+};
+
+static int __init sys_reg_genericv8_init(void)
+{
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
+		BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
+			       &genericv8_sys_regs[i]) >= 0);
+
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
+					  &genericv8_target_table);
+
+	return 0;
+}
+late_initcall(sys_reg_genericv8_init);
diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h
new file mode 100644
index 000000000..157416e96
--- /dev/null
+++ b/arch/arm64/kvm/trace.h
@@ -0,0 +1,55 @@
+#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ARM64_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+TRACE_EVENT(kvm_wfx_arm64,
+	TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
+	TP_ARGS(vcpu_pc, is_wfe),
+
+	TP_STRUCT__entry(
+		__field(unsigned long,	vcpu_pc)
+		__field(bool,		is_wfe)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc = vcpu_pc;
+		__entry->is_wfe  = is_wfe;
+	),
+
+	TP_printk("guest executed wf%c at: 0x%08lx",
+		  __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_hvc_arm64,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
+	TP_ARGS(vcpu_pc, r0, imm),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, vcpu_pc)
+		__field(unsigned long, r0)
+		__field(unsigned long, imm)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc = vcpu_pc;
+		__entry->r0 = r0;
+		__entry->imm = imm;
+	),
+
+	TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
+		  __entry->vcpu_pc, __entry->r0, __entry->imm)
+);
+
+#endif /* _TRACE_ARM64_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
new file mode 100644
index 000000000..f002fe1c3
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+ENTRY(__save_vgic_v2_state)
+__save_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
+
+	str	w4, [x3, #VGIC_V2_CPU_HCR]
+	str	w5, [x3, #VGIC_V2_CPU_VMCR]
+	str	w6, [x3, #VGIC_V2_CPU_MISR]
+CPU_LE(	str	w7, [x3, #VGIC_V2_CPU_EISR] )
+CPU_LE(	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)] )
+CPU_LE(	str	w9, [x3, #VGIC_V2_CPU_ELRSR] )
+CPU_LE(	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
+CPU_BE(	str	w7, [x3, #(VGIC_V2_CPU_EISR + 4)] )
+CPU_BE(	str	w8, [x3, #VGIC_V2_CPU_EISR] )
+CPU_BE(	str	w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
+CPU_BE(	str	w10, [x3, #VGIC_V2_CPU_ELRSR] )
+	str	w11, [x3, #VGIC_V2_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__save_vgic_v2_state)
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+ENTRY(__restore_vgic_v2_state)
+__restore_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_V2_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__restore_vgic_v2_state)
+
+	.popsection
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
new file mode 100644
index 000000000..617a012a0
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro	save_vgic_v3_state
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Make sure stores to the GIC via the memory mapped interface
+	// are now visible to the system register interface
+	dsb	st
+
+	// Save all interesting registers
+	mrs_s	x4, ICH_HCR_EL2
+	mrs_s	x5, ICH_VMCR_EL2
+	mrs_s	x6, ICH_MISR_EL2
+	mrs_s	x7, ICH_EISR_EL2
+	mrs_s	x8, ICH_ELSR_EL2
+
+	str	w4, [x3, #VGIC_V3_CPU_HCR]
+	str	w5, [x3, #VGIC_V3_CPU_VMCR]
+	str	w6, [x3, #VGIC_V3_CPU_MISR]
+	str	w7, [x3, #VGIC_V3_CPU_EISR]
+	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+	msr_s	ICH_HCR_EL2, xzr
+
+	mrs_s	x21, ICH_VTR_EL2
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	mrs_s	x20, ICH_LR15_EL2
+	mrs_s	x19, ICH_LR14_EL2
+	mrs_s	x18, ICH_LR13_EL2
+	mrs_s	x17, ICH_LR12_EL2
+	mrs_s	x16, ICH_LR11_EL2
+	mrs_s	x15, ICH_LR10_EL2
+	mrs_s	x14, ICH_LR9_EL2
+	mrs_s	x13, ICH_LR8_EL2
+	mrs_s	x12, ICH_LR7_EL2
+	mrs_s	x11, ICH_LR6_EL2
+	mrs_s	x10, ICH_LR5_EL2
+	mrs_s	x9, ICH_LR4_EL2
+	mrs_s	x8, ICH_LR3_EL2
+	mrs_s	x7, ICH_LR2_EL2
+	mrs_s	x6, ICH_LR1_EL2
+	mrs_s	x5, ICH_LR0_EL2
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	str	x20, [x3, #LR_OFFSET(15)]
+	str	x19, [x3, #LR_OFFSET(14)]
+	str	x18, [x3, #LR_OFFSET(13)]
+	str	x17, [x3, #LR_OFFSET(12)]
+	str	x16, [x3, #LR_OFFSET(11)]
+	str	x15, [x3, #LR_OFFSET(10)]
+	str	x14, [x3, #LR_OFFSET(9)]
+	str	x13, [x3, #LR_OFFSET(8)]
+	str	x12, [x3, #LR_OFFSET(7)]
+	str	x11, [x3, #LR_OFFSET(6)]
+	str	x10, [x3, #LR_OFFSET(5)]
+	str	x9, [x3, #LR_OFFSET(4)]
+	str	x8, [x3, #LR_OFFSET(3)]
+	str	x7, [x3, #LR_OFFSET(2)]
+	str	x6, [x3, #LR_OFFSET(1)]
+	str	x5, [x3, #LR_OFFSET(0)]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs_s	x20, ICH_AP0R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	mrs_s	x19, ICH_AP0R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+6:	mrs_s	x18, ICH_AP0R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+5:	mrs_s	x17, ICH_AP0R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP0R]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs_s	x20, ICH_AP1R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	mrs_s	x19, ICH_AP1R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+6:	mrs_s	x18, ICH_AP1R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+5:	mrs_s	x17, ICH_AP1R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP1R]
+
+	// Restore SRE_EL1 access and re-enable SRE at EL1.
+	mrs_s	x5, ICC_SRE_EL2
+	orr	x5, x5, #ICC_SRE_EL2_ENABLE
+	msr_s	ICC_SRE_EL2, x5
+	isb
+	mov	x5, #1
+	msr_s	ICC_SRE_EL1, x5
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro	restore_vgic_v3_state
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Restore all interesting registers
+	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
+	ldr	w25, [x3, #VGIC_V3_CPU_SRE]
+
+	msr_s	ICC_SRE_EL1, x25
+
+	// make sure SRE is valid before writing the other registers
+	isb
+
+	msr_s	ICH_HCR_EL2, x4
+	msr_s	ICH_VMCR_EL2, x5
+
+	mrs_s	x21, ICH_VTR_EL2
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	msr_s	ICH_AP1R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+	msr_s	ICH_AP1R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+	msr_s	ICH_AP1R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
+	msr_s	ICH_AP1R0_EL2, x17
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	msr_s	ICH_AP0R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+	msr_s	ICH_AP0R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+	msr_s	ICH_AP0R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
+	msr_s	ICH_AP0R0_EL2, x17
+
+	and	w22, w21, #0xf
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	ldr	x20, [x3, #LR_OFFSET(15)]
+	ldr	x19, [x3, #LR_OFFSET(14)]
+	ldr	x18, [x3, #LR_OFFSET(13)]
+	ldr	x17, [x3, #LR_OFFSET(12)]
+	ldr	x16, [x3, #LR_OFFSET(11)]
+	ldr	x15, [x3, #LR_OFFSET(10)]
+	ldr	x14, [x3, #LR_OFFSET(9)]
+	ldr	x13, [x3, #LR_OFFSET(8)]
+	ldr	x12, [x3, #LR_OFFSET(7)]
+	ldr	x11, [x3, #LR_OFFSET(6)]
+	ldr	x10, [x3, #LR_OFFSET(5)]
+	ldr	x9, [x3, #LR_OFFSET(4)]
+	ldr	x8, [x3, #LR_OFFSET(3)]
+	ldr	x7, [x3, #LR_OFFSET(2)]
+	ldr	x6, [x3, #LR_OFFSET(1)]
+	ldr	x5, [x3, #LR_OFFSET(0)]
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	msr_s	ICH_LR15_EL2, x20
+	msr_s	ICH_LR14_EL2, x19
+	msr_s	ICH_LR13_EL2, x18
+	msr_s	ICH_LR12_EL2, x17
+	msr_s	ICH_LR11_EL2, x16
+	msr_s	ICH_LR10_EL2, x15
+	msr_s	ICH_LR9_EL2,  x14
+	msr_s	ICH_LR8_EL2,  x13
+	msr_s	ICH_LR7_EL2,  x12
+	msr_s	ICH_LR6_EL2,  x11
+	msr_s	ICH_LR5_EL2,  x10
+	msr_s	ICH_LR4_EL2,   x9
+	msr_s	ICH_LR3_EL2,   x8
+	msr_s	ICH_LR2_EL2,   x7
+	msr_s	ICH_LR1_EL2,   x6
+	msr_s	ICH_LR0_EL2,   x5
+
+	// Ensure that the above will have reached the
+	// (re)distributors. This ensure the guest will read
+	// the correct values from the memory-mapped interface.
+	isb
+	dsb	sy
+
+	// Prevent the guest from touching the GIC system registers
+	// if SRE isn't enabled for GICv3 emulation
+	cbnz	x25, 1f
+	mrs_s	x5, ICC_SRE_EL2
+	and	x5, x5, #~ICC_SRE_EL2_ENABLE
+	msr_s	ICC_SRE_EL2, x5
+1:
+.endm
+
+ENTRY(__save_vgic_v3_state)
+	save_vgic_v3_state
+	ret
+ENDPROC(__save_vgic_v3_state)
+
+ENTRY(__restore_vgic_v3_state)
+	restore_vgic_v3_state
+	ret
+ENDPROC(__restore_vgic_v3_state)
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+	mrs_s	x0, ICH_VTR_EL2
+	ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+	.popsection
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
new file mode 100644
index 000000000..d98d3e398
--- /dev/null
+++ b/arch/arm64/lib/Makefile
@@ -0,0 +1,5 @@
+lib-y		:= bitops.o clear_user.o delay.o copy_from_user.o	\
+		   copy_to_user.o copy_in_user.o copy_page.o		\
+		   clear_page.o memchr.o memcpy.o memmove.o memset.o	\
+		   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o	\
+		   strchr.o strrchr.o
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
new file mode 100644
index 000000000..7dac371cc
--- /dev/null
+++ b/arch/arm64/lib/bitops.S
@@ -0,0 +1,69 @@
+/*
+ * Based on arch/arm/lib/bitops.h
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * x0: bits 5:0  bit offset
+ *     bits 31:6 word offset
+ * x1: address
+ */
+	.macro	bitop, name, instr
+ENTRY(	\name	)
+	and	w3, w0, #63		// Get bit offset
+	eor	w0, w0, w3		// Clear low bits
+	mov	x2, #1
+	add	x1, x1, x0, lsr #3	// Get word offset
+	lsl	x3, x2, x3		// Create mask
+1:	ldxr	x2, [x1]
+	\instr	x2, x2, x3
+	stxr	w0, x2, [x1]
+	cbnz	w0, 1b
+	ret
+ENDPROC(\name	)
+	.endm
+
+	.macro	testop, name, instr
+ENTRY(	\name	)
+	and	w3, w0, #63		// Get bit offset
+	eor	w0, w0, w3		// Clear low bits
+	mov	x2, #1
+	add	x1, x1, x0, lsr #3	// Get word offset
+	lsl	x4, x2, x3		// Create mask
+1:	ldxr	x2, [x1]
+	lsr	x0, x2, x3		// Save old value of bit
+	\instr	x2, x2, x4		// toggle bit
+	stlxr	w5, x2, [x1]
+	cbnz	w5, 1b
+	dmb	ish
+	and	x0, x0, #1
+3:	ret
+ENDPROC(\name	)
+	.endm
+
+/*
+ * Atomic bit operations.
+ */
+	bitop	change_bit, eor
+	bitop	clear_bit, bic
+	bitop	set_bit, orr
+
+	testop	test_and_change_bit, eor
+	testop	test_and_clear_bit, bic
+	testop	test_and_set_bit, orr
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
new file mode 100644
index 000000000..ef08e905e
--- /dev/null
+++ b/arch/arm64/lib/clear_page.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+
+/*
+ * Clear page @dest
+ *
+ * Parameters:
+ *	x0 - dest
+ */
+ENTRY(clear_page)
+	mrs	x1, dczid_el0
+	and	w1, w1, #0xf
+	mov	x2, #4
+	lsl	x1, x2, x1
+
+1:	dc	zva, x0
+	add	x0, x0, x1
+	tst	x0, #(PAGE_SIZE - 1)
+	b.ne	1b
+	ret
+ENDPROC(clear_page)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
new file mode 100644
index 000000000..c17967fdf
--- /dev/null
+++ b/arch/arm64/lib/clear_user.S
@@ -0,0 +1,58 @@
+/*
+ * Based on arch/arm/lib/clear_user.S
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+
+/* Prototype: int __clear_user(void *addr, size_t sz)
+ * Purpose  : clear some user memory
+ * Params   : addr - user memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ *
+ * Alignment fixed up by hardware.
+ */
+ENTRY(__clear_user)
+	mov	x2, x1			// save the size for fixup return
+	subs	x1, x1, #8
+	b.mi	2f
+1:
+USER(9f, str	xzr, [x0], #8	)
+	subs	x1, x1, #8
+	b.pl	1b
+2:	adds	x1, x1, #4
+	b.mi	3f
+USER(9f, str	wzr, [x0], #4	)
+	sub	x1, x1, #4
+3:	adds	x1, x1, #2
+	b.mi	4f
+USER(9f, strh	wzr, [x0], #2	)
+	sub	x1, x1, #2
+4:	adds	x1, x1, #1
+	b.mi	5f
+USER(9f, strb	wzr, [x0]	)
+5:	mov	x0, #0
+	ret
+ENDPROC(__clear_user)
+
+	.section .fixup,"ax"
+	.align	2
+9:	mov	x0, x2			// return the original size
+	ret
+	.previous
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
new file mode 100644
index 000000000..5e27add9d
--- /dev/null
+++ b/arch/arm64/lib/copy_from_user.S
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Copy from user space to a kernel buffer (alignment handled by the hardware)
+ *
+ * Parameters:
+ *	x0 - to
+ *	x1 - from
+ *	x2 - n
+ * Returns:
+ *	x0 - bytes not copied
+ */
+ENTRY(__copy_from_user)
+	add	x4, x1, x2			// upper user buffer boundary
+	subs	x2, x2, #8
+	b.mi	2f
+1:
+USER(9f, ldr	x3, [x1], #8	)
+	subs	x2, x2, #8
+	str	x3, [x0], #8
+	b.pl	1b
+2:	adds	x2, x2, #4
+	b.mi	3f
+USER(9f, ldr	w3, [x1], #4	)
+	sub	x2, x2, #4
+	str	w3, [x0], #4
+3:	adds	x2, x2, #2
+	b.mi	4f
+USER(9f, ldrh	w3, [x1], #2	)
+	sub	x2, x2, #2
+	strh	w3, [x0], #2
+4:	adds	x2, x2, #1
+	b.mi	5f
+USER(9f, ldrb	w3, [x1]	)
+	strb	w3, [x0]
+5:	mov	x0, #0
+	ret
+ENDPROC(__copy_from_user)
+
+	.section .fixup,"ax"
+	.align	2
+9:	sub	x2, x4, x1
+	mov	x3, x2
+10:	strb	wzr, [x0], #1			// zero remaining buffer space
+	subs	x3, x3, #1
+	b.ne	10b
+	mov	x0, x2				// bytes not copied
+	ret
+	.previous
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
new file mode 100644
index 000000000..84b6c9bb9
--- /dev/null
+++ b/arch/arm64/lib/copy_in_user.S
@@ -0,0 +1,63 @@
+/*
+ * Copy from user space to user space
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Copy from user space to user space (alignment handled by the hardware)
+ *
+ * Parameters:
+ *	x0 - to
+ *	x1 - from
+ *	x2 - n
+ * Returns:
+ *	x0 - bytes not copied
+ */
+ENTRY(__copy_in_user)
+	add	x4, x0, x2			// upper user buffer boundary
+	subs	x2, x2, #8
+	b.mi	2f
+1:
+USER(9f, ldr	x3, [x1], #8	)
+	subs	x2, x2, #8
+USER(9f, str	x3, [x0], #8	)
+	b.pl	1b
+2:	adds	x2, x2, #4
+	b.mi	3f
+USER(9f, ldr	w3, [x1], #4	)
+	sub	x2, x2, #4
+USER(9f, str	w3, [x0], #4	)
+3:	adds	x2, x2, #2
+	b.mi	4f
+USER(9f, ldrh	w3, [x1], #2	)
+	sub	x2, x2, #2
+USER(9f, strh	w3, [x0], #2	)
+4:	adds	x2, x2, #1
+	b.mi	5f
+USER(9f, ldrb	w3, [x1]	)
+USER(9f, strb	w3, [x0]	)
+5:	mov	x0, #0
+	ret
+ENDPROC(__copy_in_user)
+
+	.section .fixup,"ax"
+	.align	2
+9:	sub	x0, x4, x0			// bytes not copied
+	ret
+	.previous
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
new file mode 100644
index 000000000..512b9a7b9
--- /dev/null
+++ b/arch/arm64/lib/copy_page.S
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+
+/*
+ * Copy a page from src to dest (both are page aligned)
+ *
+ * Parameters:
+ *	x0 - dest
+ *	x1 - src
+ */
+ENTRY(copy_page)
+	/* Assume cache line size is 64 bytes. */
+	prfm	pldl1strm, [x1, #64]
+1:	ldp	x2, x3, [x1]
+	ldp	x4, x5, [x1, #16]
+	ldp	x6, x7, [x1, #32]
+	ldp	x8, x9, [x1, #48]
+	add	x1, x1, #64
+	prfm	pldl1strm, [x1, #64]
+	stnp	x2, x3, [x0]
+	stnp	x4, x5, [x0, #16]
+	stnp	x6, x7, [x0, #32]
+	stnp	x8, x9, [x0, #48]
+	add	x0, x0, #64
+	tst	x1, #(PAGE_SIZE - 1)
+	b.ne	1b
+	ret
+ENDPROC(copy_page)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
new file mode 100644
index 000000000..a0aeeb9b7
--- /dev/null
+++ b/arch/arm64/lib/copy_to_user.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Copy to user space from a kernel buffer (alignment handled by the hardware)
+ *
+ * Parameters:
+ *	x0 - to
+ *	x1 - from
+ *	x2 - n
+ * Returns:
+ *	x0 - bytes not copied
+ */
+ENTRY(__copy_to_user)
+	add	x4, x0, x2			// upper user buffer boundary
+	subs	x2, x2, #8
+	b.mi	2f
+1:
+	ldr	x3, [x1], #8
+	subs	x2, x2, #8
+USER(9f, str	x3, [x0], #8	)
+	b.pl	1b
+2:	adds	x2, x2, #4
+	b.mi	3f
+	ldr	w3, [x1], #4
+	sub	x2, x2, #4
+USER(9f, str	w3, [x0], #4	)
+3:	adds	x2, x2, #2
+	b.mi	4f
+	ldrh	w3, [x1], #2
+	sub	x2, x2, #2
+USER(9f, strh	w3, [x0], #2	)
+4:	adds	x2, x2, #1
+	b.mi	5f
+	ldrb	w3, [x1]
+USER(9f, strb	w3, [x0]	)
+5:	mov	x0, #0
+	ret
+ENDPROC(__copy_to_user)
+
+	.section .fixup,"ax"
+	.align	2
+9:	sub	x0, x4, x0			// bytes not copied
+	ret
+	.previous
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
new file mode 100644
index 000000000..dad4ec9bb
--- /dev/null
+++ b/arch/arm64/lib/delay.c
@@ -0,0 +1,55 @@
+/*
+ * Delay loops based on the OpenRISC implementation.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/timex.h>
+
+void __delay(unsigned long cycles)
+{
+	cycles_t start = get_cycles();
+
+	while ((get_cycles() - start) < cycles)
+		cpu_relax();
+}
+EXPORT_SYMBOL(__delay);
+
+inline void __const_udelay(unsigned long xloops)
+{
+	unsigned long loops;
+
+	loops = xloops * loops_per_jiffy * HZ;
+	__delay(loops >> 32);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x10C7UL); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
new file mode 100644
index 000000000..8636b7549
--- /dev/null
+++ b/arch/arm64/lib/memchr.S
@@ -0,0 +1,44 @@
+/*
+ * Based on arch/arm/lib/memchr.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Find a character in an area of memory.
+ *
+ * Parameters:
+ *	x0 - buf
+ *	x1 - c
+ *	x2 - n
+ * Returns:
+ *	x0 - address of first occurrence of 'c' or 0
+ */
+ENTRY(memchr)
+	and	w1, w1, #0xff
+1:	subs	x2, x2, #1
+	b.mi	2f
+	ldrb	w3, [x0], #1
+	cmp	w3, w1
+	b.ne	1b
+	sub	x0, x0, #1
+	ret
+2:	mov	x0, #0
+	ret
+ENDPROC(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
new file mode 100644
index 000000000..6ea0776ba
--- /dev/null
+++ b/arch/arm64/lib/memcmp.S
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+* compare memory areas(when two memory areas' offset are different,
+* alignment handled by the hardware)
+*
+* Parameters:
+*  x0 - const memory area 1 pointer
+*  x1 - const memory area 2 pointer
+*  x2 - the maximal compare byte length
+* Returns:
+*  x0 - a compare result, maybe less than, equal to, or greater than ZERO
+*/
+
+/* Parameters and result.  */
+src1		.req	x0
+src2		.req	x1
+limit		.req	x2
+result		.req	x0
+
+/* Internal variables.  */
+data1		.req	x3
+data1w		.req	w3
+data2		.req	x4
+data2w		.req	w4
+has_nul		.req	x5
+diff		.req	x6
+endloop		.req	x7
+tmp1		.req	x8
+tmp2		.req	x9
+tmp3		.req	x10
+pos		.req	x11
+limit_wd	.req	x12
+mask		.req	x13
+
+ENTRY(memcmp)
+	cbz	limit, .Lret0
+	eor	tmp1, src1, src2
+	tst	tmp1, #7
+	b.ne	.Lmisaligned8
+	ands	tmp1, src1, #7
+	b.ne	.Lmutual_align
+	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #3 /* Convert to Dwords.  */
+	/*
+	* The input source addresses are at alignment boundary.
+	* Directly compare eight bytes each time.
+	*/
+.Lloop_aligned:
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+.Lstart_realigned:
+	subs	limit_wd, limit_wd, #1
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, cs	/* Last Dword or differences.  */
+	cbz	endloop, .Lloop_aligned
+
+	/* Not reached the limit, must have found a diff.  */
+	tbz	limit_wd, #63, .Lnot_limit
+
+	/* Limit % 8 == 0 => the diff is in the last 8 bytes. */
+	ands	limit, limit, #7
+	b.eq	.Lnot_limit
+	/*
+	* The remained bytes less than 8. It is needed to extract valid data
+	* from last eight bytes of the intended memory range.
+	*/
+	lsl	limit, limit, #3	/* bytes-> bits.  */
+	mov	mask, #~0
+CPU_BE( lsr	mask, mask, limit )
+CPU_LE( lsl	mask, mask, limit )
+	bic	data1, data1, mask
+	bic	data2, data2, mask
+
+	orr	diff, diff, mask
+	b	.Lnot_limit
+
+.Lmutual_align:
+	/*
+	* Sources are mutually aligned, but are not currently at an
+	* alignment boundary. Round down the addresses and then mask off
+	* the bytes that precede the start point.
+	*/
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+	/*
+	* We can not add limit with alignment offset(tmp1) here. Since the
+	* addition probably make the limit overflown.
+	*/
+	sub	limit_wd, limit, #1/*limit != 0, so no underflow.*/
+	and	tmp3, limit_wd, #7
+	lsr	limit_wd, limit_wd, #3
+	add	tmp3, tmp3, tmp1
+	add	limit_wd, limit_wd, tmp3, lsr #3
+	add	limit, limit, tmp1/* Adjust the limit for the extra.  */
+
+	lsl	tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
+	neg	tmp1, tmp1/* Bits to alignment -64.  */
+	mov	tmp2, #~0
+	/*mask off the non-intended bytes before the start address.*/
+CPU_BE( lsl	tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
+	/* Little-endian.  Early bytes are at LSB.  */
+CPU_LE( lsr	tmp2, tmp2, tmp1 )
+
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	b	.Lstart_realigned
+
+	/*src1 and src2 have different alignment offset.*/
+.Lmisaligned8:
+	cmp	limit, #8
+	b.lo	.Ltiny8proc /*limit < 8: compare byte by byte*/
+
+	and	tmp1, src1, #7
+	neg	tmp1, tmp1
+	add	tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
+	and	tmp2, src2, #7
+	neg	tmp2, tmp2
+	add	tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
+	subs	tmp3, tmp1, tmp2
+	csel	pos, tmp1, tmp2, hi /*Choose the maximum.*/
+
+	sub	limit, limit, pos
+	/*compare the proceeding bytes in the first 8 byte segment.*/
+.Ltinycmp:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	pos, pos, #1
+	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000.  */
+	b.eq	.Ltinycmp
+	cbnz	pos, 1f /*diff occurred before the last byte.*/
+	cmp	data1w, data2w
+	b.eq	.Lstart_align
+1:
+	sub	result, data1, data2
+	ret
+
+.Lstart_align:
+	lsr	limit_wd, limit, #3
+	cbz	limit_wd, .Lremain8
+
+	ands	xzr, src1, #7
+	b.eq	.Lrecal_offset
+	/*process more leading bytes to make src1 aligned...*/
+	add	src1, src1, tmp3 /*backwards src1 to alignment boundary*/
+	add	src2, src2, tmp3
+	sub	limit, limit, tmp3
+	lsr	limit_wd, limit, #3
+	cbz	limit_wd, .Lremain8
+	/*load 8 bytes from aligned SRC1..*/
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+
+	subs	limit_wd, limit_wd, #1
+	eor	diff, data1, data2  /*Non-zero if differences found.*/
+	csinv	endloop, diff, xzr, ne
+	cbnz	endloop, .Lunequal_proc
+	/*How far is the current SRC2 from the alignment boundary...*/
+	and	tmp3, tmp3, #7
+
+.Lrecal_offset:/*src1 is aligned now..*/
+	neg	pos, tmp3
+.Lloopcmp_proc:
+	/*
+	* Divide the eight bytes into two parts. First,backwards the src2
+	* to an alignment boundary,load eight bytes and compare from
+	* the SRC2 alignment boundary. If all 8 bytes are equal,then start
+	* the second part's comparison. Otherwise finish the comparison.
+	* This special handle can garantee all the accesses are in the
+	* thread/task space in avoid to overrange access.
+	*/
+	ldr	data1, [src1,pos]
+	ldr	data2, [src2,pos]
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	cbnz	diff, .Lnot_limit
+
+	/*The second part process*/
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	subs	limit_wd, limit_wd, #1
+	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+	cbz	endloop, .Lloopcmp_proc
+.Lunequal_proc:
+	cbz	diff, .Lremain8
+
+/*There is differnence occured in the latest comparison.*/
+.Lnot_limit:
+/*
+* For little endian,reverse the low significant equal bits into MSB,then
+* following CLZ can find how many equal bits exist.
+*/
+CPU_LE( rev	diff, diff )
+CPU_LE( rev	data1, data1 )
+CPU_LE( rev	data2, data2 )
+
+	/*
+	* The MS-non-zero bit of DIFF marks either the first bit
+	* that is different, or the end of the significant data.
+	* Shifting left now will bring the critical information into the
+	* top bits.
+	*/
+	clz	pos, diff
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/*
+	* We need to zero-extend (char is unsigned) the value and then
+	* perform a signed subtraction.
+	*/
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+
+.Lremain8:
+	/* Limit % 8 == 0 =>. all data are equal.*/
+	ands	limit, limit, #7
+	b.eq	.Lret0
+
+.Ltiny8proc:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	limit, limit, #1
+
+	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000. */
+	b.eq	.Ltiny8proc
+	sub	result, data1, data2
+	ret
+.Lret0:
+	mov	result, #0
+	ret
+ENDPROC(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
new file mode 100644
index 000000000..8a9a96d3d
--- /dev/null
+++ b/arch/arm64/lib/memcpy.S
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+/*
+ * Copy a buffer from src to dest (alignment handled by the hardware)
+ *
+ * Parameters:
+ *	x0 - dest
+ *	x1 - src
+ *	x2 - n
+ * Returns:
+ *	x0 - dest
+ */
+dstin	.req	x0
+src	.req	x1
+count	.req	x2
+tmp1	.req	x3
+tmp1w	.req	w3
+tmp2	.req	x4
+tmp2w	.req	w4
+tmp3	.req	x5
+tmp3w	.req	w5
+dst	.req	x6
+
+A_l	.req	x7
+A_h	.req	x8
+B_l	.req	x9
+B_h	.req	x10
+C_l	.req	x11
+C_h	.req	x12
+D_l	.req	x13
+D_h	.req	x14
+
+ENTRY(memcpy)
+	mov	dst, dstin
+	cmp	count, #16
+	/*When memory length is less than 16, the accessed are not aligned.*/
+	b.lo	.Ltiny15
+
+	neg	tmp2, src
+	ands	tmp2, tmp2, #15/* Bytes to reach alignment. */
+	b.eq	.LSrcAligned
+	sub	count, count, tmp2
+	/*
+	* Copy the leading memory data from src to dst in an increasing
+	* address order.By this way,the risk of overwritting the source
+	* memory data is eliminated when the distance between src and
+	* dst is less than 16. The memory accesses here are alignment.
+	*/
+	tbz	tmp2, #0, 1f
+	ldrb	tmp1w, [src], #1
+	strb	tmp1w, [dst], #1
+1:
+	tbz	tmp2, #1, 2f
+	ldrh	tmp1w, [src], #2
+	strh	tmp1w, [dst], #2
+2:
+	tbz	tmp2, #2, 3f
+	ldr	tmp1w, [src], #4
+	str	tmp1w, [dst], #4
+3:
+	tbz	tmp2, #3, .LSrcAligned
+	ldr	tmp1, [src],#8
+	str	tmp1, [dst],#8
+
+.LSrcAligned:
+	cmp	count, #64
+	b.ge	.Lcpy_over64
+	/*
+	* Deal with small copies quickly by dropping straight into the
+	* exit block.
+	*/
+.Ltail63:
+	/*
+	* Copy up to 48 bytes of data. At this point we only need the
+	* bottom 6 bits of count to be accurate.
+	*/
+	ands	tmp1, count, #0x30
+	b.eq	.Ltiny15
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	ldp	A_l, A_h, [src], #16
+	stp	A_l, A_h, [dst], #16
+1:
+	ldp	A_l, A_h, [src], #16
+	stp	A_l, A_h, [dst], #16
+2:
+	ldp	A_l, A_h, [src], #16
+	stp	A_l, A_h, [dst], #16
+.Ltiny15:
+	/*
+	* Prefer to break one ldp/stp into several load/store to access
+	* memory in an increasing address order,rather than to load/store 16
+	* bytes from (src-16) to (dst-16) and to backward the src to aligned
+	* address,which way is used in original cortex memcpy. If keeping
+	* the original memcpy process here, memmove need to satisfy the
+	* precondition that src address is at least 16 bytes bigger than dst
+	* address,otherwise some source data will be overwritten when memove
+	* call memcpy directly. To make memmove simpler and decouple the
+	* memcpy's dependency on memmove, withdrew the original process.
+	*/
+	tbz	count, #3, 1f
+	ldr	tmp1, [src], #8
+	str	tmp1, [dst], #8
+1:
+	tbz	count, #2, 2f
+	ldr	tmp1w, [src], #4
+	str	tmp1w, [dst], #4
+2:
+	tbz	count, #1, 3f
+	ldrh	tmp1w, [src], #2
+	strh	tmp1w, [dst], #2
+3:
+	tbz	count, #0, .Lexitfunc
+	ldrb	tmp1w, [src]
+	strb	tmp1w, [dst]
+
+.Lexitfunc:
+	ret
+
+.Lcpy_over64:
+	subs	count, count, #128
+	b.ge	.Lcpy_body_large
+	/*
+	* Less than 128 bytes to copy, so handle 64 here and then jump
+	* to the tail.
+	*/
+	ldp	A_l, A_h, [src],#16
+	stp	A_l, A_h, [dst],#16
+	ldp	B_l, B_h, [src],#16
+	ldp	C_l, C_h, [src],#16
+	stp	B_l, B_h, [dst],#16
+	stp	C_l, C_h, [dst],#16
+	ldp	D_l, D_h, [src],#16
+	stp	D_l, D_h, [dst],#16
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+	ret
+
+	/*
+	* Critical loop.  Start at a new cache line boundary.  Assuming
+	* 64 bytes per line this ensures the entire loop is in one line.
+	*/
+	.p2align	L1_CACHE_SHIFT
+.Lcpy_body_large:
+	/* pre-get 64 bytes data. */
+	ldp	A_l, A_h, [src],#16
+	ldp	B_l, B_h, [src],#16
+	ldp	C_l, C_h, [src],#16
+	ldp	D_l, D_h, [src],#16
+1:
+	/*
+	* interlace the load of next 64 bytes data block with store of the last
+	* loaded 64 bytes data.
+	*/
+	stp	A_l, A_h, [dst],#16
+	ldp	A_l, A_h, [src],#16
+	stp	B_l, B_h, [dst],#16
+	ldp	B_l, B_h, [src],#16
+	stp	C_l, C_h, [dst],#16
+	ldp	C_l, C_h, [src],#16
+	stp	D_l, D_h, [dst],#16
+	ldp	D_l, D_h, [src],#16
+	subs	count, count, #64
+	b.ge	1b
+	stp	A_l, A_h, [dst],#16
+	stp	B_l, B_h, [dst],#16
+	stp	C_l, C_h, [dst],#16
+	stp	D_l, D_h, [dst],#16
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+	ret
+ENDPROC(memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
new file mode 100644
index 000000000..57b19ea2d
--- /dev/null
+++ b/arch/arm64/lib/memmove.S
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+/*
+ * Move a buffer from src to test (alignment handled by the hardware).
+ * If dest <= src, call memcpy, otherwise copy in reverse order.
+ *
+ * Parameters:
+ *	x0 - dest
+ *	x1 - src
+ *	x2 - n
+ * Returns:
+ *	x0 - dest
+ */
+dstin	.req	x0
+src	.req	x1
+count	.req	x2
+tmp1	.req	x3
+tmp1w	.req	w3
+tmp2	.req	x4
+tmp2w	.req	w4
+tmp3	.req	x5
+tmp3w	.req	w5
+dst	.req	x6
+
+A_l	.req	x7
+A_h	.req	x8
+B_l	.req	x9
+B_h	.req	x10
+C_l	.req	x11
+C_h	.req	x12
+D_l	.req	x13
+D_h	.req	x14
+
+ENTRY(memmove)
+	cmp	dstin, src
+	b.lo	memcpy
+	add	tmp1, src, count
+	cmp	dstin, tmp1
+	b.hs	memcpy		/* No overlap.  */
+
+	add	dst, dstin, count
+	add	src, src, count
+	cmp	count, #16
+	b.lo	.Ltail15  /*probably non-alignment accesses.*/
+
+	ands	tmp2, src, #15     /* Bytes to reach alignment.  */
+	b.eq	.LSrcAligned
+	sub	count, count, tmp2
+	/*
+	* process the aligned offset length to make the src aligned firstly.
+	* those extra instructions' cost is acceptable. It also make the
+	* coming accesses are based on aligned address.
+	*/
+	tbz	tmp2, #0, 1f
+	ldrb	tmp1w, [src, #-1]!
+	strb	tmp1w, [dst, #-1]!
+1:
+	tbz	tmp2, #1, 2f
+	ldrh	tmp1w, [src, #-2]!
+	strh	tmp1w, [dst, #-2]!
+2:
+	tbz	tmp2, #2, 3f
+	ldr	tmp1w, [src, #-4]!
+	str	tmp1w, [dst, #-4]!
+3:
+	tbz	tmp2, #3, .LSrcAligned
+	ldr	tmp1, [src, #-8]!
+	str	tmp1, [dst, #-8]!
+
+.LSrcAligned:
+	cmp	count, #64
+	b.ge	.Lcpy_over64
+
+	/*
+	* Deal with small copies quickly by dropping straight into the
+	* exit block.
+	*/
+.Ltail63:
+	/*
+	* Copy up to 48 bytes of data. At this point we only need the
+	* bottom 6 bits of count to be accurate.
+	*/
+	ands	tmp1, count, #0x30
+	b.eq	.Ltail15
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	ldp	A_l, A_h, [src, #-16]!
+	stp	A_l, A_h, [dst, #-16]!
+1:
+	ldp	A_l, A_h, [src, #-16]!
+	stp	A_l, A_h, [dst, #-16]!
+2:
+	ldp	A_l, A_h, [src, #-16]!
+	stp	A_l, A_h, [dst, #-16]!
+
+.Ltail15:
+	tbz	count, #3, 1f
+	ldr	tmp1, [src, #-8]!
+	str	tmp1, [dst, #-8]!
+1:
+	tbz	count, #2, 2f
+	ldr	tmp1w, [src, #-4]!
+	str	tmp1w, [dst, #-4]!
+2:
+	tbz	count, #1, 3f
+	ldrh	tmp1w, [src, #-2]!
+	strh	tmp1w, [dst, #-2]!
+3:
+	tbz	count, #0, .Lexitfunc
+	ldrb	tmp1w, [src, #-1]
+	strb	tmp1w, [dst, #-1]
+
+.Lexitfunc:
+	ret
+
+.Lcpy_over64:
+	subs	count, count, #128
+	b.ge	.Lcpy_body_large
+	/*
+	* Less than 128 bytes to copy, so handle 64 bytes here and then jump
+	* to the tail.
+	*/
+	ldp	A_l, A_h, [src, #-16]
+	stp	A_l, A_h, [dst, #-16]
+	ldp	B_l, B_h, [src, #-32]
+	ldp	C_l, C_h, [src, #-48]
+	stp	B_l, B_h, [dst, #-32]
+	stp	C_l, C_h, [dst, #-48]
+	ldp	D_l, D_h, [src, #-64]!
+	stp	D_l, D_h, [dst, #-64]!
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+	ret
+
+	/*
+	* Critical loop. Start at a new cache line boundary. Assuming
+	* 64 bytes per line this ensures the entire loop is in one line.
+	*/
+	.p2align	L1_CACHE_SHIFT
+.Lcpy_body_large:
+	/* pre-load 64 bytes data. */
+	ldp	A_l, A_h, [src, #-16]
+	ldp	B_l, B_h, [src, #-32]
+	ldp	C_l, C_h, [src, #-48]
+	ldp	D_l, D_h, [src, #-64]!
+1:
+	/*
+	* interlace the load of next 64 bytes data block with store of the last
+	* loaded 64 bytes data.
+	*/
+	stp	A_l, A_h, [dst, #-16]
+	ldp	A_l, A_h, [src, #-16]
+	stp	B_l, B_h, [dst, #-32]
+	ldp	B_l, B_h, [src, #-32]
+	stp	C_l, C_h, [dst, #-48]
+	ldp	C_l, C_h, [src, #-48]
+	stp	D_l, D_h, [dst, #-64]!
+	ldp	D_l, D_h, [src, #-64]!
+	subs	count, count, #64
+	b.ge	1b
+	stp	A_l, A_h, [dst, #-16]
+	stp	B_l, B_h, [dst, #-32]
+	stp	C_l, C_h, [dst, #-48]
+	stp	D_l, D_h, [dst, #-64]!
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+	ret
+ENDPROC(memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
new file mode 100644
index 000000000..7c72dfd36
--- /dev/null
+++ b/arch/arm64/lib/memset.S
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+/*
+ * Fill in the buffer with character c (alignment handled by the hardware)
+ *
+ * Parameters:
+ *	x0 - buf
+ *	x1 - c
+ *	x2 - n
+ * Returns:
+ *	x0 - buf
+ */
+
+dstin		.req	x0
+val		.req	w1
+count		.req	x2
+tmp1		.req	x3
+tmp1w		.req	w3
+tmp2		.req	x4
+tmp2w		.req	w4
+zva_len_x	.req	x5
+zva_len		.req	w5
+zva_bits_x	.req	x6
+
+A_l		.req	x7
+A_lw		.req	w7
+dst		.req	x8
+tmp3w		.req	w9
+tmp3		.req	x9
+
+ENTRY(memset)
+	mov	dst, dstin	/* Preserve return value.  */
+	and	A_lw, val, #255
+	orr	A_lw, A_lw, A_lw, lsl #8
+	orr	A_lw, A_lw, A_lw, lsl #16
+	orr	A_l, A_l, A_l, lsl #32
+
+	cmp	count, #15
+	b.hi	.Lover16_proc
+	/*All store maybe are non-aligned..*/
+	tbz	count, #3, 1f
+	str	A_l, [dst], #8
+1:
+	tbz	count, #2, 2f
+	str	A_lw, [dst], #4
+2:
+	tbz	count, #1, 3f
+	strh	A_lw, [dst], #2
+3:
+	tbz	count, #0, 4f
+	strb	A_lw, [dst]
+4:
+	ret
+
+.Lover16_proc:
+	/*Whether  the start address is aligned with 16.*/
+	neg	tmp2, dst
+	ands	tmp2, tmp2, #15
+	b.eq	.Laligned
+/*
+* The count is not less than 16, we can use stp to store the start 16 bytes,
+* then adjust the dst aligned with 16.This process will make the current
+* memory address at alignment boundary.
+*/
+	stp	A_l, A_l, [dst] /*non-aligned store..*/
+	/*make the dst aligned..*/
+	sub	count, count, tmp2
+	add	dst, dst, tmp2
+
+.Laligned:
+	cbz	A_l, .Lzero_mem
+
+.Ltail_maybe_long:
+	cmp	count, #64
+	b.ge	.Lnot_short
+.Ltail63:
+	ands	tmp1, count, #0x30
+	b.eq	3f
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	stp	A_l, A_l, [dst], #16
+1:
+	stp	A_l, A_l, [dst], #16
+2:
+	stp	A_l, A_l, [dst], #16
+/*
+* The last store length is less than 16,use stp to write last 16 bytes.
+* It will lead some bytes written twice and the access is non-aligned.
+*/
+3:
+	ands	count, count, #15
+	cbz	count, 4f
+	add	dst, dst, count
+	stp	A_l, A_l, [dst, #-16]	/* Repeat some/all of last store. */
+4:
+	ret
+
+	/*
+	* Critical loop. Start at a new cache line boundary. Assuming
+	* 64 bytes per line, this ensures the entire loop is in one line.
+	*/
+	.p2align	L1_CACHE_SHIFT
+.Lnot_short:
+	sub	dst, dst, #16/* Pre-bias.  */
+	sub	count, count, #64
+1:
+	stp	A_l, A_l, [dst, #16]
+	stp	A_l, A_l, [dst, #32]
+	stp	A_l, A_l, [dst, #48]
+	stp	A_l, A_l, [dst, #64]!
+	subs	count, count, #64
+	b.ge	1b
+	tst	count, #0x3f
+	add	dst, dst, #16
+	b.ne	.Ltail63
+.Lexitfunc:
+	ret
+
+	/*
+	* For zeroing memory, check to see if we can use the ZVA feature to
+	* zero entire 'cache' lines.
+	*/
+.Lzero_mem:
+	cmp	count, #63
+	b.le	.Ltail63
+	/*
+	* For zeroing small amounts of memory, it's not worth setting up
+	* the line-clear code.
+	*/
+	cmp	count, #128
+	b.lt	.Lnot_short /*count is at least  128 bytes*/
+
+	mrs	tmp1, dczid_el0
+	tbnz	tmp1, #4, .Lnot_short
+	mov	tmp3w, #4
+	and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
+	lsl	zva_len, tmp3w, zva_len
+
+	ands	tmp3w, zva_len, #63
+	/*
+	* ensure the zva_len is not less than 64.
+	* It is not meaningful to use ZVA if the block size is less than 64.
+	*/
+	b.ne	.Lnot_short
+.Lzero_by_line:
+	/*
+	* Compute how far we need to go to become suitably aligned. We're
+	* already at quad-word alignment.
+	*/
+	cmp	count, zva_len_x
+	b.lt	.Lnot_short		/* Not enough to reach alignment.  */
+	sub	zva_bits_x, zva_len_x, #1
+	neg	tmp2, dst
+	ands	tmp2, tmp2, zva_bits_x
+	b.eq	2f			/* Already aligned.  */
+	/* Not aligned, check that there's enough to copy after alignment.*/
+	sub	tmp1, count, tmp2
+	/*
+	* grantee the remain length to be ZVA is bigger than 64,
+	* avoid to make the 2f's process over mem range.*/
+	cmp	tmp1, #64
+	ccmp	tmp1, zva_len_x, #8, ge	/* NZCV=0b1000 */
+	b.lt	.Lnot_short
+	/*
+	* We know that there's at least 64 bytes to zero and that it's safe
+	* to overrun by 64 bytes.
+	*/
+	mov	count, tmp1
+1:
+	stp	A_l, A_l, [dst]
+	stp	A_l, A_l, [dst, #16]
+	stp	A_l, A_l, [dst, #32]
+	subs	tmp2, tmp2, #64
+	stp	A_l, A_l, [dst, #48]
+	add	dst, dst, #64
+	b.ge	1b
+	/* We've overrun a bit, so adjust dst downwards.*/
+	add	dst, dst, tmp2
+2:
+	sub	count, count, zva_len_x
+3:
+	dc	zva, dst
+	add	dst, dst, zva_len_x
+	subs	count, count, zva_len_x
+	b.ge	3b
+	ands	count, count, zva_bits_x
+	b.ne	.Ltail_maybe_long
+	ret
+ENDPROC(memset)
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
new file mode 100644
index 000000000..dae0cf559
--- /dev/null
+++ b/arch/arm64/lib/strchr.S
@@ -0,0 +1,42 @@
+/*
+ * Based on arch/arm/lib/strchr.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Find the first occurrence of a character in a string.
+ *
+ * Parameters:
+ *	x0 - str
+ *	x1 - c
+ * Returns:
+ *	x0 - address of first occurrence of 'c' or 0
+ */
+ENTRY(strchr)
+	and	w1, w1, #0xff
+1:	ldrb	w2, [x0], #1
+	cmp	w2, w1
+	ccmp	w2, wzr, #4, ne
+	b.ne	1b
+	sub	x0, x0, #1
+	cmp	w2, w1
+	csel	x0, x0, xzr, eq
+	ret
+ENDPROC(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
new file mode 100644
index 000000000..42f828b06
--- /dev/null
+++ b/arch/arm64/lib/strcmp.S
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * compare two strings
+ *
+ * Parameters:
+ *	x0 - const string 1 pointer
+ *    x1 - const string 2 pointer
+ * Returns:
+ * x0 - an integer less than, equal to, or greater than zero
+ * if  s1  is  found, respectively, to be less than, to match,
+ * or be greater than s2.
+ */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result.  */
+src1		.req	x0
+src2		.req	x1
+result		.req	x0
+
+/* Internal variables.  */
+data1		.req	x2
+data1w		.req	w2
+data2		.req	x3
+data2w		.req	w3
+has_nul		.req	x4
+diff		.req	x5
+syndrome	.req	x6
+tmp1		.req	x7
+tmp2		.req	x8
+tmp3		.req	x9
+zeroones	.req	x10
+pos		.req	x11
+
+ENTRY(strcmp)
+	eor	tmp1, src1, src2
+	mov	zeroones, #REP8_01
+	tst	tmp1, #7
+	b.ne	.Lmisaligned8
+	ands	tmp1, src1, #7
+	b.ne	.Lmutual_align
+
+	/*
+	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	* can be done in parallel across the entire word.
+	*/
+.Lloop_aligned:
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+.Lstart_realigned:
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	orr	syndrome, diff, has_nul
+	cbz	syndrome, .Lloop_aligned
+	b	.Lcal_cmpresult
+
+.Lmutual_align:
+	/*
+	* Sources are mutually aligned, but are not currently at an
+	* alignment boundary.  Round down the addresses and then mask off
+	* the bytes that preceed the start point.
+	*/
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
+	ldr	data1, [src1], #8
+	neg	tmp1, tmp1		/* Bits to alignment -64.  */
+	ldr	data2, [src2], #8
+	mov	tmp2, #~0
+	/* Big-endian.  Early bytes are at MSB.  */
+CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
+	/* Little-endian.  Early bytes are at LSB.  */
+CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
+
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	b	.Lstart_realigned
+
+.Lmisaligned8:
+	/*
+	* Get the align offset length to compare per byte first.
+	* After this process, one string's address will be aligned.
+	*/
+	and	tmp1, src1, #7
+	neg	tmp1, tmp1
+	add	tmp1, tmp1, #8
+	and	tmp2, src2, #7
+	neg	tmp2, tmp2
+	add	tmp2, tmp2, #8
+	subs	tmp3, tmp1, tmp2
+	csel	pos, tmp1, tmp2, hi /*Choose the maximum. */
+.Ltinycmp:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	pos, pos, #1
+	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
+	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+	b.eq	.Ltinycmp
+	cbnz	pos, 1f /*find the null or unequal...*/
+	cmp	data1w, #1
+	ccmp	data1w, data2w, #0, cs
+	b.eq	.Lstart_align /*the last bytes are equal....*/
+1:
+	sub	result, data1, data2
+	ret
+
+.Lstart_align:
+	ands	xzr, src1, #7
+	b.eq	.Lrecal_offset
+	/*process more leading bytes to make str1 aligned...*/
+	add	src1, src1, tmp3
+	add	src2, src2, tmp3
+	/*load 8 bytes from aligned str1 and non-aligned str2..*/
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	eor	diff, data1, data2 /* Non-zero if differences found.  */
+	orr	syndrome, diff, has_nul
+	cbnz	syndrome, .Lcal_cmpresult
+	/*How far is the current str2 from the alignment boundary...*/
+	and	tmp3, tmp3, #7
+.Lrecal_offset:
+	neg	pos, tmp3
+.Lloopcmp_proc:
+	/*
+	* Divide the eight bytes into two parts. First,backwards the src2
+	* to an alignment boundary,load eight bytes from the SRC2 alignment
+	* boundary,then compare with the relative bytes from SRC1.
+	* If all 8 bytes are equal,then start the second part's comparison.
+	* Otherwise finish the comparison.
+	* This special handle can garantee all the accesses are in the
+	* thread/task space in avoid to overrange access.
+	*/
+	ldr	data1, [src1,pos]
+	ldr	data2, [src2,pos]
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	orr	syndrome, diff, has_nul
+	cbnz	syndrome, .Lcal_cmpresult
+
+	/*The second part process*/
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	orr	syndrome, diff, has_nul
+	cbz	syndrome, .Lloopcmp_proc
+
+.Lcal_cmpresult:
+	/*
+	* reversed the byte-order as big-endian,then CLZ can find the most
+	* significant zero bits.
+	*/
+CPU_LE( rev	syndrome, syndrome )
+CPU_LE( rev	data1, data1 )
+CPU_LE( rev	data2, data2 )
+
+	/*
+	* For big-endian we cannot use the trick with the syndrome value
+	* as carry-propagation can corrupt the upper bits if the trailing
+	* bytes in the string contain 0x01.
+	* However, if there is no NUL byte in the dword, we can generate
+	* the result directly.  We ca not just subtract the bytes as the
+	* MSB might be significant.
+	*/
+CPU_BE( cbnz	has_nul, 1f )
+CPU_BE( cmp	data1, data2 )
+CPU_BE( cset	result, ne )
+CPU_BE( cneg	result, result, lo )
+CPU_BE( ret )
+CPU_BE( 1: )
+	/*Re-compute the NUL-byte detection, using a byte-reversed value. */
+CPU_BE(	rev	tmp3, data1 )
+CPU_BE(	sub	tmp1, tmp3, zeroones )
+CPU_BE(	orr	tmp2, tmp3, #REP8_7f )
+CPU_BE(	bic	has_nul, tmp1, tmp2 )
+CPU_BE(	rev	has_nul, has_nul )
+CPU_BE(	orr	syndrome, diff, has_nul )
+
+	clz	pos, syndrome
+	/*
+	* The MS-non-zero bit of the syndrome marks either the first bit
+	* that is different, or the top bit of the first zero byte.
+	* Shifting left now will bring the critical information into the
+	* top bits.
+	*/
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/*
+	* But we need to zero-extend (char is unsigned) the value and then
+	* perform a signed 32-bit subtraction.
+	*/
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+ENDPROC(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
new file mode 100644
index 000000000..987b68b9c
--- /dev/null
+++ b/arch/arm64/lib/strlen.S
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * calculate the length of a string
+ *
+ * Parameters:
+ *	x0 - const string pointer
+ * Returns:
+ *	x0 - the return length of specific string
+ */
+
+/* Arguments and results.  */
+srcin		.req	x0
+len		.req	x0
+
+/* Locals and temporaries.  */
+src		.req	x1
+data1		.req	x2
+data2		.req	x3
+data2a		.req	x4
+has_nul1	.req	x5
+has_nul2	.req	x6
+tmp1		.req	x7
+tmp2		.req	x8
+tmp3		.req	x9
+tmp4		.req	x10
+zeroones	.req	x11
+pos		.req	x12
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ENTRY(strlen)
+	mov	zeroones, #REP8_01
+	bic	src, srcin, #15
+	ands	tmp1, srcin, #15
+	b.ne	.Lmisaligned
+	/*
+	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	* can be done in parallel across the entire word.
+	*/
+	/*
+	* The inner loop deals with two Dwords at a time. This has a
+	* slightly higher start-up cost, but we should win quite quickly,
+	* especially on cores with a high number of issue slots per
+	* cycle, as we get much better parallelism out of the operations.
+	*/
+.Lloop:
+	ldp	data1, data2, [src], #16
+.Lrealigned:
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bics	has_nul2, tmp3, tmp4
+	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
+	b.eq	.Lloop
+
+	sub	len, src, srcin
+	cbz	has_nul1, .Lnul_in_data2
+CPU_BE(	mov	data2, data1 )	/*prepare data to re-calculate the syndrome*/
+	sub	len, len, #8
+	mov	has_nul2, has_nul1
+.Lnul_in_data2:
+	/*
+	* For big-endian, carry propagation (if the final byte in the
+	* string is 0x01) means we cannot use has_nul directly.  The
+	* easiest way to get the correct byte is to byte-swap the data
+	* and calculate the syndrome a second time.
+	*/
+CPU_BE( rev	data2, data2 )
+CPU_BE( sub	tmp1, data2, zeroones )
+CPU_BE( orr	tmp2, data2, #REP8_7f )
+CPU_BE( bic	has_nul2, tmp1, tmp2 )
+
+	sub	len, len, #8
+	rev	has_nul2, has_nul2
+	clz	pos, has_nul2
+	add	len, len, pos, lsr #3		/* Bits to bytes.  */
+	ret
+
+.Lmisaligned:
+	cmp	tmp1, #8
+	neg	tmp1, tmp1
+	ldp	data1, data2, [src], #16
+	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
+	mov	tmp2, #~0
+	/* Big-endian.  Early bytes are at MSB.  */
+CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
+	/* Little-endian.  Early bytes are at LSB.  */
+CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
+
+	orr	data1, data1, tmp2
+	orr	data2a, data2, tmp2
+	csinv	data1, data1, xzr, le
+	csel	data2, data2, data2a, le
+	b	.Lrealigned
+ENDPROC(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
new file mode 100644
index 000000000..0224cf5a5
--- /dev/null
+++ b/arch/arm64/lib/strncmp.S
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * compare two strings
+ *
+ * Parameters:
+ *  x0 - const string 1 pointer
+ *  x1 - const string 2 pointer
+ *  x2 - the maximal length to be compared
+ * Returns:
+ *  x0 - an integer less than, equal to, or greater than zero if s1 is found,
+ *     respectively, to be less than, to match, or be greater than s2.
+ */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result.  */
+src1		.req	x0
+src2		.req	x1
+limit		.req	x2
+result		.req	x0
+
+/* Internal variables.  */
+data1		.req	x3
+data1w		.req	w3
+data2		.req	x4
+data2w		.req	w4
+has_nul		.req	x5
+diff		.req	x6
+syndrome	.req	x7
+tmp1		.req	x8
+tmp2		.req	x9
+tmp3		.req	x10
+zeroones	.req	x11
+pos		.req	x12
+limit_wd	.req	x13
+mask		.req	x14
+endloop		.req	x15
+
+ENTRY(strncmp)
+	cbz	limit, .Lret0
+	eor	tmp1, src1, src2
+	mov	zeroones, #REP8_01
+	tst	tmp1, #7
+	b.ne	.Lmisaligned8
+	ands	tmp1, src1, #7
+	b.ne	.Lmutual_align
+	/* Calculate the number of full and partial words -1.  */
+	/*
+	* when limit is mulitply of 8, if not sub 1,
+	* the judgement of last dword will wrong.
+	*/
+	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #3  /* Convert to Dwords.  */
+
+	/*
+	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	* can be done in parallel across the entire word.
+	*/
+.Lloop_aligned:
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+.Lstart_realigned:
+	subs	limit_wd, limit_wd, #1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, pl  /* Last Dword or differences.*/
+	bics	has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
+	ccmp	endloop, #0, #0, eq
+	b.eq	.Lloop_aligned
+
+	/*Not reached the limit, must have found the end or a diff.  */
+	tbz	limit_wd, #63, .Lnot_limit
+
+	/* Limit % 8 == 0 => all bytes significant.  */
+	ands	limit, limit, #7
+	b.eq	.Lnot_limit
+
+	lsl	limit, limit, #3    /* Bits -> bytes.  */
+	mov	mask, #~0
+CPU_BE( lsr	mask, mask, limit )
+CPU_LE( lsl	mask, mask, limit )
+	bic	data1, data1, mask
+	bic	data2, data2, mask
+
+	/* Make sure that the NUL byte is marked in the syndrome.  */
+	orr	has_nul, has_nul, mask
+
+.Lnot_limit:
+	orr	syndrome, diff, has_nul
+	b	.Lcal_cmpresult
+
+.Lmutual_align:
+	/*
+	* Sources are mutually aligned, but are not currently at an
+	* alignment boundary.  Round down the addresses and then mask off
+	* the bytes that precede the start point.
+	* We also need to adjust the limit calculations, but without
+	* overflowing if the limit is near ULONG_MAX.
+	*/
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	ldr	data1, [src1], #8
+	neg	tmp3, tmp1, lsl #3  /* 64 - bits(bytes beyond align). */
+	ldr	data2, [src2], #8
+	mov	tmp2, #~0
+	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
+	/* Big-endian.  Early bytes are at MSB.  */
+CPU_BE( lsl	tmp2, tmp2, tmp3 )	/* Shift (tmp1 & 63).  */
+	/* Little-endian.  Early bytes are at LSB.  */
+CPU_LE( lsr	tmp2, tmp2, tmp3 )	/* Shift (tmp1 & 63).  */
+
+	and	tmp3, limit_wd, #7
+	lsr	limit_wd, limit_wd, #3
+	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
+	add	limit, limit, tmp1
+	add	tmp3, tmp3, tmp1
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	add	limit_wd, limit_wd, tmp3, lsr #3
+	b	.Lstart_realigned
+
+/*when src1 offset is not equal to src2 offset...*/
+.Lmisaligned8:
+	cmp	limit, #8
+	b.lo	.Ltiny8proc /*limit < 8... */
+	/*
+	* Get the align offset length to compare per byte first.
+	* After this process, one string's address will be aligned.*/
+	and	tmp1, src1, #7
+	neg	tmp1, tmp1
+	add	tmp1, tmp1, #8
+	and	tmp2, src2, #7
+	neg	tmp2, tmp2
+	add	tmp2, tmp2, #8
+	subs	tmp3, tmp1, tmp2
+	csel	pos, tmp1, tmp2, hi /*Choose the maximum. */
+	/*
+	* Here, limit is not less than 8, so directly run .Ltinycmp
+	* without checking the limit.*/
+	sub	limit, limit, pos
+.Ltinycmp:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	pos, pos, #1
+	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
+	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+	b.eq	.Ltinycmp
+	cbnz	pos, 1f /*find the null or unequal...*/
+	cmp	data1w, #1
+	ccmp	data1w, data2w, #0, cs
+	b.eq	.Lstart_align /*the last bytes are equal....*/
+1:
+	sub	result, data1, data2
+	ret
+
+.Lstart_align:
+	lsr	limit_wd, limit, #3
+	cbz	limit_wd, .Lremain8
+	/*process more leading bytes to make str1 aligned...*/
+	ands	xzr, src1, #7
+	b.eq	.Lrecal_offset
+	add	src1, src1, tmp3	/*tmp3 is positive in this branch.*/
+	add	src2, src2, tmp3
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+
+	sub	limit, limit, tmp3
+	lsr	limit_wd, limit, #3
+	subs	limit_wd, limit_wd, #1
+
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+	bics	has_nul, tmp1, tmp2
+	ccmp	endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
+	b.ne	.Lunequal_proc
+	/*How far is the current str2 from the alignment boundary...*/
+	and	tmp3, tmp3, #7
+.Lrecal_offset:
+	neg	pos, tmp3
+.Lloopcmp_proc:
+	/*
+	* Divide the eight bytes into two parts. First,backwards the src2
+	* to an alignment boundary,load eight bytes from the SRC2 alignment
+	* boundary,then compare with the relative bytes from SRC1.
+	* If all 8 bytes are equal,then start the second part's comparison.
+	* Otherwise finish the comparison.
+	* This special handle can garantee all the accesses are in the
+	* thread/task space in avoid to overrange access.
+	*/
+	ldr	data1, [src1,pos]
+	ldr	data2, [src2,pos]
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bics	has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, eq
+	cbnz	endloop, .Lunequal_proc
+
+	/*The second part process*/
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+	subs	limit_wd, limit_wd, #1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+	bics	has_nul, tmp1, tmp2
+	ccmp	endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
+	b.eq	.Lloopcmp_proc
+
+.Lunequal_proc:
+	orr	syndrome, diff, has_nul
+	cbz	syndrome, .Lremain8
+.Lcal_cmpresult:
+	/*
+	* reversed the byte-order as big-endian,then CLZ can find the most
+	* significant zero bits.
+	*/
+CPU_LE( rev	syndrome, syndrome )
+CPU_LE( rev	data1, data1 )
+CPU_LE( rev	data2, data2 )
+	/*
+	* For big-endian we cannot use the trick with the syndrome value
+	* as carry-propagation can corrupt the upper bits if the trailing
+	* bytes in the string contain 0x01.
+	* However, if there is no NUL byte in the dword, we can generate
+	* the result directly.  We can't just subtract the bytes as the
+	* MSB might be significant.
+	*/
+CPU_BE( cbnz	has_nul, 1f )
+CPU_BE( cmp	data1, data2 )
+CPU_BE( cset	result, ne )
+CPU_BE( cneg	result, result, lo )
+CPU_BE( ret )
+CPU_BE( 1: )
+	/* Re-compute the NUL-byte detection, using a byte-reversed value.*/
+CPU_BE( rev	tmp3, data1 )
+CPU_BE( sub	tmp1, tmp3, zeroones )
+CPU_BE( orr	tmp2, tmp3, #REP8_7f )
+CPU_BE( bic	has_nul, tmp1, tmp2 )
+CPU_BE( rev	has_nul, has_nul )
+CPU_BE( orr	syndrome, diff, has_nul )
+	/*
+	* The MS-non-zero bit of the syndrome marks either the first bit
+	* that is different, or the top bit of the first zero byte.
+	* Shifting left now will bring the critical information into the
+	* top bits.
+	*/
+	clz	pos, syndrome
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/*
+	* But we need to zero-extend (char is unsigned) the value and then
+	* perform a signed 32-bit subtraction.
+	*/
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+
+.Lremain8:
+	/* Limit % 8 == 0 => all bytes significant.  */
+	ands	limit, limit, #7
+	b.eq	.Lret0
+.Ltiny8proc:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	limit, limit, #1
+
+	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
+	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+	b.eq	.Ltiny8proc
+	sub	result, data1, data2
+	ret
+
+.Lret0:
+	mov	result, #0
+	ret
+ENDPROC(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
new file mode 100644
index 000000000..2ca665711
--- /dev/null
+++ b/arch/arm64/lib/strnlen.S
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * determine the length of a fixed-size string
+ *
+ * Parameters:
+ *	x0 - const string pointer
+ *	x1 - maximal string length
+ * Returns:
+ *	x0 - the return length of specific string
+ */
+
+/* Arguments and results.  */
+srcin		.req	x0
+len		.req	x0
+limit		.req	x1
+
+/* Locals and temporaries.  */
+src		.req	x2
+data1		.req	x3
+data2		.req	x4
+data2a		.req	x5
+has_nul1	.req	x6
+has_nul2	.req	x7
+tmp1		.req	x8
+tmp2		.req	x9
+tmp3		.req	x10
+tmp4		.req	x11
+zeroones	.req	x12
+pos		.req	x13
+limit_wd	.req	x14
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ENTRY(strnlen)
+	cbz	limit, .Lhit_limit
+	mov	zeroones, #REP8_01
+	bic	src, srcin, #15
+	ands	tmp1, srcin, #15
+	b.ne	.Lmisaligned
+	/* Calculate the number of full and partial words -1.  */
+	sub	limit_wd, limit, #1 /* Limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #4  /* Convert to Qwords.  */
+
+	/*
+	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	* can be done in parallel across the entire word.
+	*/
+	/*
+	* The inner loop deals with two Dwords at a time.  This has a
+	* slightly higher start-up cost, but we should win quite quickly,
+	* especially on cores with a high number of issue slots per
+	* cycle, as we get much better parallelism out of the operations.
+	*/
+.Lloop:
+	ldp	data1, data2, [src], #16
+.Lrealigned:
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	subs	limit_wd, limit_wd, #1
+	orr	tmp1, has_nul1, has_nul2
+	ccmp	tmp1, #0, #0, pl    /* NZCV = 0000  */
+	b.eq	.Lloop
+
+	cbz	tmp1, .Lhit_limit   /* No null in final Qword.  */
+
+	/*
+	* We know there's a null in the final Qword. The easiest thing
+	* to do now is work out the length of the string and return
+	* MIN (len, limit).
+	*/
+	sub	len, src, srcin
+	cbz	has_nul1, .Lnul_in_data2
+CPU_BE( mov	data2, data1 )	/*perpare data to re-calculate the syndrome*/
+
+	sub	len, len, #8
+	mov	has_nul2, has_nul1
+.Lnul_in_data2:
+	/*
+	* For big-endian, carry propagation (if the final byte in the
+	* string is 0x01) means we cannot use has_nul directly.  The
+	* easiest way to get the correct byte is to byte-swap the data
+	* and calculate the syndrome a second time.
+	*/
+CPU_BE( rev	data2, data2 )
+CPU_BE( sub	tmp1, data2, zeroones )
+CPU_BE( orr	tmp2, data2, #REP8_7f )
+CPU_BE( bic	has_nul2, tmp1, tmp2 )
+
+	sub	len, len, #8
+	rev	has_nul2, has_nul2
+	clz	pos, has_nul2
+	add	len, len, pos, lsr #3       /* Bits to bytes.  */
+	cmp	len, limit
+	csel	len, len, limit, ls     /* Return the lower value.  */
+	ret
+
+.Lmisaligned:
+	/*
+	* Deal with a partial first word.
+	* We're doing two things in parallel here;
+	* 1) Calculate the number of words (but avoiding overflow if
+	* limit is near ULONG_MAX) - to do this we need to work out
+	* limit + tmp1 - 1 as a 65-bit value before shifting it;
+	* 2) Load and mask the initial data words - we force the bytes
+	* before the ones we are interested in to 0xff - this ensures
+	* early bytes will not hit any zero detection.
+	*/
+	ldp	data1, data2, [src], #16
+
+	sub	limit_wd, limit, #1
+	and	tmp3, limit_wd, #15
+	lsr	limit_wd, limit_wd, #4
+
+	add	tmp3, tmp3, tmp1
+	add	limit_wd, limit_wd, tmp3, lsr #4
+
+	neg	tmp4, tmp1
+	lsl	tmp4, tmp4, #3  /* Bytes beyond alignment -> bits.  */
+
+	mov	tmp2, #~0
+	/* Big-endian.  Early bytes are at MSB.  */
+CPU_BE( lsl	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
+	/* Little-endian.  Early bytes are at LSB.  */
+CPU_LE( lsr	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
+
+	cmp	tmp1, #8
+
+	orr	data1, data1, tmp2
+	orr	data2a, data2, tmp2
+
+	csinv	data1, data1, xzr, le
+	csel	data2, data2, data2a, le
+	b	.Lrealigned
+
+.Lhit_limit:
+	mov	len, limit
+	ret
+ENDPROC(strnlen)
diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S
new file mode 100644
index 000000000..61eabd9a2
--- /dev/null
+++ b/arch/arm64/lib/strrchr.S
@@ -0,0 +1,43 @@
+/*
+ * Based on arch/arm/lib/strrchr.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Find the last occurrence of a character in a string.
+ *
+ * Parameters:
+ *	x0 - str
+ *	x1 - c
+ * Returns:
+ *	x0 - address of last occurrence of 'c' or 0
+ */
+ENTRY(strrchr)
+	mov	x3, #0
+	and	w1, w1, #0xff
+1:	ldrb	w2, [x0], #1
+	cbz	w2, 2f
+	cmp	w2, w1
+	b.ne	1b
+	sub	x3, x0, #1
+	b	1b
+2:	mov	x0, x3
+	ret
+ENDPROC(strrchr)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
new file mode 100644
index 000000000..773d37a14
--- /dev/null
+++ b/arch/arm64/mm/Makefile
@@ -0,0 +1,6 @@
+obj-y				:= dma-mapping.o extable.o fault.o init.o \
+				   cache.o copypage.o flush.o \
+				   ioremap.o mmap.o pgd.o mmu.o \
+				   context.o proc.o pageattr.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
new file mode 100644
index 000000000..2560e1e15
--- /dev/null
+++ b/arch/arm64/mm/cache.S
@@ -0,0 +1,268 @@
+/*
+ * Cache maintenance
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
+
+#include "proc-macros.S"
+
+/*
+ *	__flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: x0-x7, x9-x11
+ */
+__flush_dcache_all:
+	dmb	sy				// ensure ordering with previous memory accesses
+	mrs	x0, clidr_el1			// read clidr
+	and	x3, x0, #0x7000000		// extract loc from clidr
+	lsr	x3, x3, #23			// left align loc bit field
+	cbz	x3, finished			// if loc is 0, then no need to clean
+	mov	x10, #0				// start clean at cache level 0
+loop1:
+	add	x2, x10, x10, lsr #1		// work out 3x current cache level
+	lsr	x1, x0, x2			// extract cache type bits from clidr
+	and	x1, x1, #7			// mask of the bits for current cache only
+	cmp	x1, #2				// see what cache we have at this level
+	b.lt	skip				// skip if no cache, or just i-cache
+	save_and_disable_irqs x9		// make CSSELR and CCSIDR access atomic
+	msr	csselr_el1, x10			// select current cache level in csselr
+	isb					// isb to sych the new cssr&csidr
+	mrs	x1, ccsidr_el1			// read the new ccsidr
+	restore_irqs x9
+	and	x2, x1, #7			// extract the length of the cache lines
+	add	x2, x2, #4			// add 4 (line length offset)
+	mov	x4, #0x3ff
+	and	x4, x4, x1, lsr #3		// find maximum number on the way size
+	clz	w5, w4				// find bit position of way size increment
+	mov	x7, #0x7fff
+	and	x7, x7, x1, lsr #13		// extract max number of the index size
+loop2:
+	mov	x9, x4				// create working copy of max way size
+loop3:
+	lsl	x6, x9, x5
+	orr	x11, x10, x6			// factor way and cache number into x11
+	lsl	x6, x7, x2
+	orr	x11, x11, x6			// factor index number into x11
+	dc	cisw, x11			// clean & invalidate by set/way
+	subs	x9, x9, #1			// decrement the way
+	b.ge	loop3
+	subs	x7, x7, #1			// decrement the index
+	b.ge	loop2
+skip:
+	add	x10, x10, #2			// increment cache number
+	cmp	x3, x10
+	b.gt	loop1
+finished:
+	mov	x10, #0				// swith back to cache level 0
+	msr	csselr_el1, x10			// select current cache level in csselr
+	dsb	sy
+	isb
+	ret
+ENDPROC(__flush_dcache_all)
+
+/*
+ *	flush_cache_all()
+ *
+ *	Flush the entire cache system.  The data cache flush is now achieved
+ *	using atomic clean / invalidates working outwards from L1 cache. This
+ *	is done using Set/Way based cache maintainance instructions.  The
+ *	instruction cache can still be invalidated back to the point of
+ *	unification in a single instruction.
+ */
+ENTRY(flush_cache_all)
+	mov	x12, lr
+	bl	__flush_dcache_all
+	mov	x0, #0
+	ic	ialluis				// I+BTB cache invalidate
+	ret	x12
+ENDPROC(flush_cache_all)
+
+/*
+ *	flush_icache_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(flush_icache_range)
+	/* FALLTHROUGH */
+
+/*
+ *	__flush_cache_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(__flush_cache_user_range)
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x0, x3
+1:
+USER(9f, dc	cvau, x4	)		// clean D line to PoU
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	1b
+	dsb	ish
+
+	icache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x0, x3
+1:
+USER(9f, ic	ivau, x4	)		// invalidate I line PoU
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	1b
+	dsb	ish
+	isb
+	mov	x0, #0
+	ret
+9:
+	mov	x0, #-EFAULT
+	ret
+ENDPROC(flush_icache_range)
+ENDPROC(__flush_cache_user_range)
+
+/*
+ *	__flush_dcache_area(kaddr, size)
+ *
+ *	Ensure that the data held in the page kaddr is written back to the
+ *	page in question.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__flush_dcache_area)
+	dcache_line_size x2, x3
+	add	x1, x0, x1
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0			// clean & invalidate D line / unified line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__flush_dcache_area)
+
+/*
+ *	__inval_cache_range(start, end)
+ *	- start   - start address of region
+ *	- end     - end address of region
+ */
+ENTRY(__inval_cache_range)
+	/* FALLTHROUGH */
+
+/*
+ *	__dma_inv_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_inv_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	tst	x1, x3				// end cache line aligned?
+	bic	x1, x1, x3
+	b.eq	1f
+	dc	civac, x1			// clean & invalidate D / U line
+1:	tst	x0, x3				// start cache line aligned?
+	bic	x0, x0, x3
+	b.eq	2f
+	dc	civac, x0			// clean & invalidate D / U line
+	b	3f
+2:	dc	ivac, x0			// invalidate D / U line
+3:	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	2b
+	dsb	sy
+	ret
+ENDPROC(__inval_cache_range)
+ENDPROC(__dma_inv_range)
+
+/*
+ *	__dma_clean_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_clean_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	alternative_insn "dc cvac, x0", "dc civac, x0", ARM64_WORKAROUND_CLEAN_CACHE
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_clean_range)
+
+/*
+ *	__dma_flush_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(__dma_flush_range)
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0			// clean & invalidate D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_flush_range)
+
+/*
+ *	__dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_map_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_FROM_DEVICE
+	b.eq	__dma_inv_range
+	b	__dma_clean_range
+ENDPROC(__dma_map_area)
+
+/*
+ *	__dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_unmap_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_TO_DEVICE
+	b.ne	__dma_inv_range
+	ret
+ENDPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
new file mode 100644
index 000000000..76c1e6cd3
--- /dev/null
+++ b/arch/arm64/mm/context.c
@@ -0,0 +1,167 @@
+/*
+ * Based on arch/arm/mm/context.c
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/cachetype.h>
+
+#define asid_bits(reg) \
+	(((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
+
+#define ASID_FIRST_VERSION	(1 << MAX_ASID_BITS)
+
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
+unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+
+/*
+ * We fork()ed a process, and we need a new context for the child to run in.
+ */
+void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	mm->context.id = 0;
+	raw_spin_lock_init(&mm->context.id_lock);
+}
+
+static void flush_context(void)
+{
+	/* set the reserved TTBR0 before flushing the TLB */
+	cpu_set_reserved_ttbr0();
+	flush_tlb_all();
+	if (icache_is_aivivt())
+		__flush_icache_all();
+}
+
+#ifdef CONFIG_SMP
+
+static void set_mm_context(struct mm_struct *mm, unsigned int asid)
+{
+	unsigned long flags;
+
+	/*
+	 * Locking needed for multi-threaded applications where the same
+	 * mm->context.id could be set from different CPUs during the
+	 * broadcast. This function is also called via IPI so the
+	 * mm->context.id_lock has to be IRQ-safe.
+	 */
+	raw_spin_lock_irqsave(&mm->context.id_lock, flags);
+	if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+		/*
+		 * Old version of ASID found. Set the new one and reset
+		 * mm_cpumask(mm).
+		 */
+		mm->context.id = asid;
+		cpumask_clear(mm_cpumask(mm));
+	}
+	raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
+
+	/*
+	 * Set the mm_cpumask(mm) bit for the current CPU.
+	 */
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+}
+
+/*
+ * Reset the ASID on the current CPU. This function call is broadcast from the
+ * CPU handling the ASID rollover and holding cpu_asid_lock.
+ */
+static void reset_context(void *info)
+{
+	unsigned int asid;
+	unsigned int cpu = smp_processor_id();
+	struct mm_struct *mm = current->active_mm;
+
+	/*
+	 * current->active_mm could be init_mm for the idle thread immediately
+	 * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
+	 * the reserved value, so no need to reset any context.
+	 */
+	if (mm == &init_mm)
+		return;
+
+	smp_rmb();
+	asid = cpu_last_asid + cpu;
+
+	flush_context();
+	set_mm_context(mm, asid);
+
+	/* set the new ASID */
+	cpu_switch_mm(mm->pgd, mm);
+}
+
+#else
+
+static inline void set_mm_context(struct mm_struct *mm, unsigned int asid)
+{
+	mm->context.id = asid;
+	cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
+}
+
+#endif
+
+void __new_context(struct mm_struct *mm)
+{
+	unsigned int asid;
+	unsigned int bits = asid_bits();
+
+	raw_spin_lock(&cpu_asid_lock);
+#ifdef CONFIG_SMP
+	/*
+	 * Check the ASID again, in case the change was broadcast from another
+	 * CPU before we acquired the lock.
+	 */
+	if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		raw_spin_unlock(&cpu_asid_lock);
+		return;
+	}
+#endif
+	/*
+	 * At this point, it is guaranteed that the current mm (with an old
+	 * ASID) isn't active on any other CPU since the ASIDs are changed
+	 * simultaneously via IPI.
+	 */
+	asid = ++cpu_last_asid;
+
+	/*
+	 * If we've used up all our ASIDs, we need to start a new version and
+	 * flush the TLB.
+	 */
+	if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
+		/* increment the ASID version */
+		cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
+		if (cpu_last_asid == 0)
+			cpu_last_asid = ASID_FIRST_VERSION;
+		asid = cpu_last_asid + smp_processor_id();
+		flush_context();
+#ifdef CONFIG_SMP
+		smp_wmb();
+		smp_call_function(reset_context, NULL, 1);
+#endif
+		cpu_last_asid += NR_CPUS - 1;
+	}
+
+	set_mm_context(mm, asid);
+	raw_spin_unlock(&cpu_asid_lock);
+}
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
new file mode 100644
index 000000000..13bbc3be6
--- /dev/null
+++ b/arch/arm64/mm/copypage.c
@@ -0,0 +1,36 @@
+/*
+ * Based on arch/arm/mm/copypage.c
+ *
+ * Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+
+void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
+{
+	copy_page(kto, kfrom);
+	__flush_dcache_area(kto, PAGE_SIZE);
+}
+EXPORT_SYMBOL_GPL(__cpu_copy_user_page);
+
+void __cpu_clear_user_page(void *kaddr, unsigned long vaddr)
+{
+	clear_page(kaddr);
+}
+EXPORT_SYMBOL_GPL(__cpu_clear_user_page);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
new file mode 100644
index 000000000..b0bd4e5fd
--- /dev/null
+++ b/arch/arm64/mm/dma-mapping.c
@@ -0,0 +1,436 @@
+/*
+ * SWIOTLB-based DMA API implementation
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/gfp.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-contiguous.h>
+#include <linux/vmalloc.h>
+#include <linux/swiotlb.h>
+
+#include <asm/cacheflush.h>
+
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
+				 bool coherent)
+{
+	if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+		return pgprot_writecombine(prot);
+	return prot;
+}
+
+static struct gen_pool *atomic_pool;
+
+#define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
+static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
+
+static int __init early_coherent_pool(char *p)
+{
+	atomic_pool_size = memparse(p, &p);
+	return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+
+static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
+{
+	unsigned long val;
+	void *ptr = NULL;
+
+	if (!atomic_pool) {
+		WARN(1, "coherent pool not initialised!\n");
+		return NULL;
+	}
+
+	val = gen_pool_alloc(atomic_pool, size);
+	if (val) {
+		phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
+
+		*ret_page = phys_to_page(phys);
+		ptr = (void *)val;
+		memset(ptr, 0, size);
+	}
+
+	return ptr;
+}
+
+static bool __in_atomic_pool(void *start, size_t size)
+{
+	return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+}
+
+static int __free_from_pool(void *start, size_t size)
+{
+	if (!__in_atomic_pool(start, size))
+		return 0;
+
+	gen_pool_free(atomic_pool, (unsigned long)start, size);
+
+	return 1;
+}
+
+static void *__dma_alloc_coherent(struct device *dev, size_t size,
+				  dma_addr_t *dma_handle, gfp_t flags,
+				  struct dma_attrs *attrs)
+{
+	if (dev == NULL) {
+		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
+		return NULL;
+	}
+
+	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
+		flags |= GFP_DMA;
+	if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
+		struct page *page;
+		void *addr;
+
+		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
+							get_order(size));
+		if (!page)
+			return NULL;
+
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
+		addr = page_address(page);
+		memset(addr, 0, size);
+		return addr;
+	} else {
+		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+	}
+}
+
+static void __dma_free_coherent(struct device *dev, size_t size,
+				void *vaddr, dma_addr_t dma_handle,
+				struct dma_attrs *attrs)
+{
+	bool freed;
+	phys_addr_t paddr = dma_to_phys(dev, dma_handle);
+
+	if (dev == NULL) {
+		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
+		return;
+	}
+
+	freed = dma_release_from_contiguous(dev,
+					phys_to_page(paddr),
+					size >> PAGE_SHIFT);
+	if (!freed)
+		swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+}
+
+static void *__dma_alloc(struct device *dev, size_t size,
+			 dma_addr_t *dma_handle, gfp_t flags,
+			 struct dma_attrs *attrs)
+{
+	struct page *page;
+	void *ptr, *coherent_ptr;
+	bool coherent = is_device_dma_coherent(dev);
+
+	size = PAGE_ALIGN(size);
+
+	if (!coherent && !(flags & __GFP_WAIT)) {
+		struct page *page = NULL;
+		void *addr = __alloc_from_pool(size, &page, flags);
+
+		if (addr)
+			*dma_handle = phys_to_dma(dev, page_to_phys(page));
+
+		return addr;
+	}
+
+	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
+	if (!ptr)
+		goto no_mem;
+
+	/* no need for non-cacheable mapping if coherent */
+	if (coherent)
+		return ptr;
+
+	/* remove any dirty cache lines on the kernel alias */
+	__dma_flush_range(ptr, ptr + size);
+
+	/* create a coherent mapping */
+	page = virt_to_page(ptr);
+	coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
+				__get_dma_pgprot(attrs,
+					__pgprot(PROT_NORMAL_NC), false),
+					NULL);
+	if (!coherent_ptr)
+		goto no_map;
+
+	return coherent_ptr;
+
+no_map:
+	__dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
+no_mem:
+	*dma_handle = DMA_ERROR_CODE;
+	return NULL;
+}
+
+static void __dma_free(struct device *dev, size_t size,
+		       void *vaddr, dma_addr_t dma_handle,
+		       struct dma_attrs *attrs)
+{
+	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+
+	size = PAGE_ALIGN(size);
+
+	if (!is_device_dma_coherent(dev)) {
+		if (__free_from_pool(vaddr, size))
+			return;
+		vunmap(vaddr);
+	}
+	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
+}
+
+static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	dma_addr_t dev_addr;
+
+	dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
+	if (!is_device_dma_coherent(dev))
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+
+	return dev_addr;
+}
+
+
+static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
+				 size_t size, enum dma_data_direction dir,
+				 struct dma_attrs *attrs)
+{
+	if (!is_device_dma_coherent(dev))
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+				  int nelems, enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i, ret;
+
+	ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
+	if (!is_device_dma_coherent(dev))
+		for_each_sg(sgl, sg, ret, i)
+			__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				       sg->length, dir);
+
+	return ret;
+}
+
+static void __swiotlb_unmap_sg_attrs(struct device *dev,
+				     struct scatterlist *sgl, int nelems,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (!is_device_dma_coherent(dev))
+		for_each_sg(sgl, sg, nelems, i)
+			__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+					 sg->length, dir);
+	swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+}
+
+static void __swiotlb_sync_single_for_cpu(struct device *dev,
+					  dma_addr_t dev_addr, size_t size,
+					  enum dma_data_direction dir)
+{
+	if (!is_device_dma_coherent(dev))
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+}
+
+static void __swiotlb_sync_single_for_device(struct device *dev,
+					     dma_addr_t dev_addr, size_t size,
+					     enum dma_data_direction dir)
+{
+	swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
+	if (!is_device_dma_coherent(dev))
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+}
+
+static void __swiotlb_sync_sg_for_cpu(struct device *dev,
+				      struct scatterlist *sgl, int nelems,
+				      enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (!is_device_dma_coherent(dev))
+		for_each_sg(sgl, sg, nelems, i)
+			__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+					 sg->length, dir);
+	swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
+static void __swiotlb_sync_sg_for_device(struct device *dev,
+					 struct scatterlist *sgl, int nelems,
+					 enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
+	if (!is_device_dma_coherent(dev))
+		for_each_sg(sgl, sg, nelems, i)
+			__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				       sg->length, dir);
+}
+
+/* vma->vm_page_prot must be set appropriately before calling this function */
+static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
+			     void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	int ret = -ENXIO;
+	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
+					PAGE_SHIFT;
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      pfn + off,
+				      vma->vm_end - vma->vm_start,
+				      vma->vm_page_prot);
+	}
+
+	return ret;
+}
+
+static int __swiotlb_mmap(struct device *dev,
+			  struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size,
+			  struct dma_attrs *attrs)
+{
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
+					     is_device_dma_coherent(dev));
+	return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+static struct dma_map_ops swiotlb_dma_ops = {
+	.alloc = __dma_alloc,
+	.free = __dma_free,
+	.mmap = __swiotlb_mmap,
+	.map_page = __swiotlb_map_page,
+	.unmap_page = __swiotlb_unmap_page,
+	.map_sg = __swiotlb_map_sg_attrs,
+	.unmap_sg = __swiotlb_unmap_sg_attrs,
+	.sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = __swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = __swiotlb_sync_sg_for_device,
+	.dma_supported = swiotlb_dma_supported,
+	.mapping_error = swiotlb_dma_mapping_error,
+};
+
+static int __init atomic_pool_init(void)
+{
+	pgprot_t prot = __pgprot(PROT_NORMAL_NC);
+	unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
+	struct page *page;
+	void *addr;
+	unsigned int pool_size_order = get_order(atomic_pool_size);
+
+	if (dev_get_cma_area(NULL))
+		page = dma_alloc_from_contiguous(NULL, nr_pages,
+							pool_size_order);
+	else
+		page = alloc_pages(GFP_DMA, pool_size_order);
+
+	if (page) {
+		int ret;
+		void *page_addr = page_address(page);
+
+		memset(page_addr, 0, atomic_pool_size);
+		__dma_flush_range(page_addr, page_addr + atomic_pool_size);
+
+		atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
+		if (!atomic_pool)
+			goto free_page;
+
+		addr = dma_common_contiguous_remap(page, atomic_pool_size,
+					VM_USERMAP, prot, atomic_pool_init);
+
+		if (!addr)
+			goto destroy_genpool;
+
+		ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
+					page_to_phys(page),
+					atomic_pool_size, -1);
+		if (ret)
+			goto remove_mapping;
+
+		gen_pool_set_algo(atomic_pool,
+				  gen_pool_first_fit_order_align,
+				  (void *)PAGE_SHIFT);
+
+		pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
+			atomic_pool_size / 1024);
+		return 0;
+	}
+	goto out;
+
+remove_mapping:
+	dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
+destroy_genpool:
+	gen_pool_destroy(atomic_pool);
+	atomic_pool = NULL;
+free_page:
+	if (!dma_release_from_contiguous(NULL, page, nr_pages))
+		__free_pages(page, pool_size_order);
+out:
+	pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
+		atomic_pool_size / 1024);
+	return -ENOMEM;
+}
+
+static int __init arm64_dma_init(void)
+{
+	int ret;
+
+	dma_ops = &swiotlb_dma_ops;
+
+	ret = atomic_pool_init();
+
+	return ret;
+}
+arch_initcall(arm64_dma_init);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+static int __init dma_debug_do_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(dma_debug_do_init);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
new file mode 100644
index 000000000..f3d6221cd
--- /dev/null
+++ b/arch/arm64/mm/dump.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * Derived from x86 and arm implementation:
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+
+#include <asm/fixmap.h>
+#include <asm/memory.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+
+#define LOWEST_ADDR	(UL(0xffffffffffffffff) << VA_BITS)
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+enum address_markers_idx {
+	VMALLOC_START_NR = 0,
+	VMALLOC_END_NR,
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	VMEMMAP_START_NR,
+	VMEMMAP_END_NR,
+#endif
+	FIXADDR_START_NR,
+	FIXADDR_END_NR,
+	PCI_START_NR,
+	PCI_END_NR,
+	MODULES_START_NR,
+	MODUELS_END_NR,
+	KERNEL_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+	{ VMALLOC_START,	"vmalloc() Area" },
+	{ VMALLOC_END,		"vmalloc() End" },
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	{ 0,			"vmemmap start" },
+	{ 0,			"vmemmap end" },
+#endif
+	{ FIXADDR_START,	"Fixmap start" },
+	{ FIXADDR_TOP,		"Fixmap end" },
+	{ PCI_IO_START,		"PCI I/O start" },
+	{ PCI_IO_END,		"PCI I/O end" },
+	{ MODULES_VADDR,	"Modules start" },
+	{ MODULES_END,		"Modules end" },
+	{ PAGE_OFFSET,		"Kernel Mapping" },
+	{ -1,			NULL },
+};
+
+struct pg_state {
+	struct seq_file *seq;
+	const struct addr_marker *marker;
+	unsigned long start_address;
+	unsigned level;
+	u64 current_prot;
+};
+
+struct prot_bits {
+	u64		mask;
+	u64		val;
+	const char	*set;
+	const char	*clear;
+};
+
+static const struct prot_bits pte_bits[] = {
+	{
+		.mask	= PTE_USER,
+		.val	= PTE_USER,
+		.set	= "USR",
+		.clear	= "   ",
+	}, {
+		.mask	= PTE_RDONLY,
+		.val	= PTE_RDONLY,
+		.set	= "ro",
+		.clear	= "RW",
+	}, {
+		.mask	= PTE_PXN,
+		.val	= PTE_PXN,
+		.set	= "NX",
+		.clear	= "x ",
+	}, {
+		.mask	= PTE_SHARED,
+		.val	= PTE_SHARED,
+		.set	= "SHD",
+		.clear	= "   ",
+	}, {
+		.mask	= PTE_AF,
+		.val	= PTE_AF,
+		.set	= "AF",
+		.clear	= "  ",
+	}, {
+		.mask	= PTE_NG,
+		.val	= PTE_NG,
+		.set	= "NG",
+		.clear	= "  ",
+	}, {
+		.mask	= PTE_UXN,
+		.val	= PTE_UXN,
+		.set	= "UXN",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRnE),
+		.set	= "DEVICE/nGnRnE",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRE),
+		.set	= "DEVICE/nGnRE",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_DEVICE_GRE),
+		.set	= "DEVICE/GRE",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_NORMAL_NC),
+		.set	= "MEM/NORMAL-NC",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_NORMAL),
+		.set	= "MEM/NORMAL",
+	}
+};
+
+struct pg_level {
+	const struct prot_bits *bits;
+	size_t num;
+	u64 mask;
+};
+
+static struct pg_level pg_level[] = {
+	{
+	}, { /* pgd */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	}, { /* pud */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	}, { /* pmd */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	}, { /* pte */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	},
+};
+
+static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
+			size_t num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++, bits++) {
+		const char *s;
+
+		if ((st->current_prot & bits->mask) == bits->val)
+			s = bits->set;
+		else
+			s = bits->clear;
+
+		if (s)
+			seq_printf(st->seq, " %s", s);
+	}
+}
+
+static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
+				u64 val)
+{
+	static const char units[] = "KMGTPE";
+	u64 prot = val & pg_level[level].mask;
+
+	if (!st->level) {
+		st->level = level;
+		st->current_prot = prot;
+		st->start_address = addr;
+		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	} else if (prot != st->current_prot || level != st->level ||
+		   addr >= st->marker[1].start_address) {
+		const char *unit = units;
+		unsigned long delta;
+
+		if (st->current_prot) {
+			seq_printf(st->seq, "0x%16lx-0x%16lx   ",
+				   st->start_address, addr);
+
+			delta = (addr - st->start_address) >> 10;
+			while (!(delta & 1023) && unit[1]) {
+				delta >>= 10;
+				unit++;
+			}
+			seq_printf(st->seq, "%9lu%c", delta, *unit);
+			if (pg_level[st->level].bits)
+				dump_prot(st, pg_level[st->level].bits,
+					  pg_level[st->level].num);
+			seq_puts(st->seq, "\n");
+		}
+
+		if (addr >= st->marker[1].start_address) {
+			st->marker++;
+			seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+		}
+
+		st->start_address = addr;
+		st->current_prot = prot;
+		st->level = level;
+	}
+
+	if (addr >= st->marker[1].start_address) {
+		st->marker++;
+		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	}
+
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+		addr = start + i * PAGE_SIZE;
+		note_page(st, addr, 4, pte_val(*pte));
+	}
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+		addr = start + i * PMD_SIZE;
+		if (pmd_none(*pmd) || pmd_sect(*pmd)) {
+			note_page(st, addr, 3, pmd_val(*pmd));
+		} else {
+			BUG_ON(pmd_bad(*pmd));
+			walk_pte(st, pmd, addr);
+		}
+	}
+}
+
+static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+	pud_t *pud = pud_offset(pgd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+		addr = start + i * PUD_SIZE;
+		if (pud_none(*pud) || pud_sect(*pud)) {
+			note_page(st, addr, 2, pud_val(*pud));
+		} else {
+			BUG_ON(pud_bad(*pud));
+			walk_pmd(st, pud, addr);
+		}
+	}
+}
+
+static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start)
+{
+	pgd_t *pgd = pgd_offset(mm, 0UL);
+	unsigned i;
+	unsigned long addr;
+
+	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+		addr = start + i * PGDIR_SIZE;
+		if (pgd_none(*pgd)) {
+			note_page(st, addr, 1, pgd_val(*pgd));
+		} else {
+			BUG_ON(pgd_bad(*pgd));
+			walk_pud(st, pgd, addr);
+		}
+	}
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	struct pg_state st = {
+		.seq = m,
+		.marker = address_markers,
+	};
+
+	walk_pgd(&st, &init_mm, LOWEST_ADDR);
+
+	note_page(&st, 0, 0, 0);
+	return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+	.open		= ptdump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ptdump_init(void)
+{
+	struct dentry *pe;
+	unsigned i, j;
+
+	for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+		if (pg_level[i].bits)
+			for (j = 0; j < pg_level[i].num; j++)
+				pg_level[i].mask |= pg_level[i].bits[j].mask;
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	address_markers[VMEMMAP_START_NR].start_address =
+				(unsigned long)virt_to_page(PAGE_OFFSET);
+	address_markers[VMEMMAP_END_NR].start_address =
+				(unsigned long)virt_to_page(high_memory);
+#endif
+
+	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
+				 &ptdump_fops);
+	return pe ? 0 : -ENOMEM;
+}
+device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
new file mode 100644
index 000000000..79444279b
--- /dev/null
+++ b/arch/arm64/mm/extable.c
@@ -0,0 +1,17 @@
+/*
+ * Based on arch/arm/mm/extable.c
+ */
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(instruction_pointer(regs));
+	if (fixup)
+		regs->pc = fixup->fixup;
+
+	return fixup != NULL;
+}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
new file mode 100644
index 000000000..96da13167
--- /dev/null
+++ b/arch/arm64/mm/fault.c
@@ -0,0 +1,532 @@
+/*
+ * Based on arch/arm/mm/fault.c
+ *
+ * Copyright (C) 1995  Linus Torvalds
+ * Copyright (C) 1995-2004 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/hardirq.h>
+#include <linux/init.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/highmem.h>
+#include <linux/perf_event.h>
+
+#include <asm/exception.h>
+#include <asm/debug-monitors.h>
+#include <asm/esr.h>
+#include <asm/system_misc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static const char *fault_name(unsigned int esr);
+
+/*
+ * Dump out the page tables associated with 'addr' in mm 'mm'.
+ */
+void show_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+
+	if (!mm)
+		mm = &init_mm;
+
+	pr_alert("pgd = %p\n", mm->pgd);
+	pgd = pgd_offset(mm, addr);
+	pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
+
+	do {
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		if (pgd_none(*pgd) || pgd_bad(*pgd))
+			break;
+
+		pud = pud_offset(pgd, addr);
+		printk(", *pud=%016llx", pud_val(*pud));
+		if (pud_none(*pud) || pud_bad(*pud))
+			break;
+
+		pmd = pmd_offset(pud, addr);
+		printk(", *pmd=%016llx", pmd_val(*pmd));
+		if (pmd_none(*pmd) || pmd_bad(*pmd))
+			break;
+
+		pte = pte_offset_map(pmd, addr);
+		printk(", *pte=%016llx", pte_val(*pte));
+		pte_unmap(pte);
+	} while(0);
+
+	printk("\n");
+}
+
+/*
+ * The kernel tried to access some page that wasn't present.
+ */
+static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
+			      unsigned int esr, struct pt_regs *regs)
+{
+	/*
+	 * Are we prepared to handle this kernel fault?
+	 */
+	if (fixup_exception(regs))
+		return;
+
+	/*
+	 * No handler, we'll have to terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
+		 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
+		 "paging request", addr);
+
+	show_pte(mm, addr);
+	die("Oops", regs, esr);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+}
+
+/*
+ * Something tried to access memory that isn't in our memory map. User mode
+ * accesses just cause a SIGSEGV
+ */
+static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
+			    unsigned int esr, unsigned int sig, int code,
+			    struct pt_regs *regs)
+{
+	struct siginfo si;
+
+	if (show_unhandled_signals && unhandled_signal(tsk, sig) &&
+	    printk_ratelimit()) {
+		pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
+			tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
+			addr, esr);
+		show_pte(tsk->mm, addr);
+		show_regs(regs);
+	}
+
+	tsk->thread.fault_address = addr;
+	tsk->thread.fault_code = esr;
+	si.si_signo = sig;
+	si.si_errno = 0;
+	si.si_code = code;
+	si.si_addr = (void __user *)addr;
+	force_sig_info(sig, &si, tsk);
+}
+
+static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->active_mm;
+
+	/*
+	 * If we are in kernel mode at this point, we have no context to
+	 * handle this fault with.
+	 */
+	if (user_mode(regs))
+		__do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs);
+	else
+		__do_kernel_fault(mm, addr, esr, regs);
+}
+
+#define VM_FAULT_BADMAP		0x010000
+#define VM_FAULT_BADACCESS	0x020000
+
+#define ESR_LNX_EXEC		(1 << 24)
+
+static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
+			   unsigned int mm_flags, unsigned long vm_flags,
+			   struct task_struct *tsk)
+{
+	struct vm_area_struct *vma;
+	int fault;
+
+	vma = find_vma(mm, addr);
+	fault = VM_FAULT_BADMAP;
+	if (unlikely(!vma))
+		goto out;
+	if (unlikely(vma->vm_start > addr))
+		goto check_stack;
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so we can handle
+	 * it.
+	 */
+good_area:
+	/*
+	 * Check that the permissions on the VMA allow for the fault which
+	 * occurred. If we encountered a write or exec fault, we must have
+	 * appropriate permissions, otherwise we allow any permission.
+	 */
+	if (!(vma->vm_flags & vm_flags)) {
+		fault = VM_FAULT_BADACCESS;
+		goto out;
+	}
+
+	return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);
+
+check_stack:
+	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
+		goto good_area;
+out:
+	return fault;
+}
+
+static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
+				   struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	int fault, sig, code;
+	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	tsk = current;
+	mm  = tsk->mm;
+
+	/* Enable interrupts if they were enabled in the parent context. */
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
+	/*
+	 * If we're in an interrupt or have no user context, we must not take
+	 * the fault.
+	 */
+	if (in_atomic() || !mm)
+		goto no_context;
+
+	if (user_mode(regs))
+		mm_flags |= FAULT_FLAG_USER;
+
+	if (esr & ESR_LNX_EXEC) {
+		vm_flags = VM_EXEC;
+	} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
+		vm_flags = VM_WRITE;
+		mm_flags |= FAULT_FLAG_WRITE;
+	}
+
+	/*
+	 * As per x86, we may deadlock here. However, since the kernel only
+	 * validly references user space from well defined areas of the code,
+	 * we can bug out early if this is from code which shouldn't.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (!user_mode(regs) && !search_exception_tables(regs->pc))
+			goto no_context;
+retry:
+		down_read(&mm->mmap_sem);
+	} else {
+		/*
+		 * The above down_read_trylock() might have succeeded in which
+		 * case, we'll have missed the might_sleep() from down_read().
+		 */
+		might_sleep();
+#ifdef CONFIG_DEBUG_VM
+		if (!user_mode(regs) && !search_exception_tables(regs->pc))
+			goto no_context;
+#endif
+	}
+
+	fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+
+	/*
+	 * If we need to retry but a fatal signal is pending, handle the
+	 * signal first. We do not need to release the mmap_sem because it
+	 * would already be released in __lock_page_or_retry in mm/filemap.c.
+	 */
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+		return 0;
+
+	/*
+	 * Major/minor page fault accounting is only done on the initial
+	 * attempt. If we go through a retry, it is extremely likely that the
+	 * page will be found in page cache at that point.
+	 */
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+	if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			tsk->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
+				      addr);
+		} else {
+			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
+				      addr);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			/*
+			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
+			 * starvation.
+			 */
+			mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			goto retry;
+		}
+	}
+
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+	 */
+	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
+			      VM_FAULT_BADACCESS))))
+		return 0;
+
+	/*
+	 * If we are in kernel mode at this point, we have no context to
+	 * handle this fault with.
+	 */
+	if (!user_mode(regs))
+		goto no_context;
+
+	if (fault & VM_FAULT_OOM) {
+		/*
+		 * We ran out of memory, call the OOM killer, and return to
+		 * userspace (which will retry the fault, or kill us if we got
+		 * oom-killed).
+		 */
+		pagefault_out_of_memory();
+		return 0;
+	}
+
+	if (fault & VM_FAULT_SIGBUS) {
+		/*
+		 * We had some memory, but were unable to successfully fix up
+		 * this page fault.
+		 */
+		sig = SIGBUS;
+		code = BUS_ADRERR;
+	} else {
+		/*
+		 * Something tried to access memory that isn't in our memory
+		 * map.
+		 */
+		sig = SIGSEGV;
+		code = fault == VM_FAULT_BADACCESS ?
+			SEGV_ACCERR : SEGV_MAPERR;
+	}
+
+	__do_user_fault(tsk, addr, esr, sig, code, regs);
+	return 0;
+
+no_context:
+	__do_kernel_fault(mm, addr, esr, regs);
+	return 0;
+}
+
+/*
+ * First Level Translation Fault Handler
+ *
+ * We enter here because the first level page table doesn't contain a valid
+ * entry for the address.
+ *
+ * If the address is in kernel space (>= TASK_SIZE), then we are probably
+ * faulting in the vmalloc() area.
+ *
+ * If the init_task's first level page tables contains the relevant entry, we
+ * copy the it to this task.  If not, we send the process a signal, fixup the
+ * exception, or oops the kernel.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt
+ * or a critical region, and should only copy the information from the master
+ * page table, nothing more.
+ */
+static int __kprobes do_translation_fault(unsigned long addr,
+					  unsigned int esr,
+					  struct pt_regs *regs)
+{
+	if (addr < TASK_SIZE)
+		return do_page_fault(addr, esr, regs);
+
+	do_bad_area(addr, esr, regs);
+	return 0;
+}
+
+/*
+ * This abort handler always returns "fault".
+ */
+static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+	return 1;
+}
+
+static struct fault_info {
+	int	(*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+	int	sig;
+	int	code;
+	const char *name;
+} fault_info[] = {
+	{ do_bad,		SIGBUS,  0,		"ttbr address size fault"	},
+	{ do_bad,		SIGBUS,  0,		"level 1 address size fault"	},
+	{ do_bad,		SIGBUS,  0,		"level 2 address size fault"	},
+	{ do_bad,		SIGBUS,  0,		"level 3 address size fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
+	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
+	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
+	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
+	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 18"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 19"			},
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error"	},
+	{ do_bad,		SIGBUS,  0,		"asynchronous parity error"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 26"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 27"			},
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"unknown 32"			},
+	{ do_bad,		SIGBUS,  BUS_ADRALN,	"alignment fault"		},
+	{ do_bad,		SIGBUS,  0,		"debug event"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 35"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 36"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 37"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 38"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 39"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 40"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 41"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 42"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 43"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 44"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 45"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 46"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 47"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 48"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 49"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 50"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 51"			},
+	{ do_bad,		SIGBUS,  0,		"implementation fault (lockdown abort)" },
+	{ do_bad,		SIGBUS,  0,		"unknown 53"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 54"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 55"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 56"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 57"			},
+	{ do_bad,		SIGBUS,  0,		"implementation fault (coprocessor abort)" },
+	{ do_bad,		SIGBUS,  0,		"unknown 59"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 60"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 61"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 62"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 63"			},
+};
+
+static const char *fault_name(unsigned int esr)
+{
+	const struct fault_info *inf = fault_info + (esr & 63);
+	return inf->name;
+}
+
+/*
+ * Dispatch a data abort to the relevant handler.
+ */
+asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
+					 struct pt_regs *regs)
+{
+	const struct fault_info *inf = fault_info + (esr & 63);
+	struct siginfo info;
+
+	if (!inf->fn(addr, esr, regs))
+		return;
+
+	pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
+		 inf->name, esr, addr);
+
+	info.si_signo = inf->sig;
+	info.si_errno = 0;
+	info.si_code  = inf->code;
+	info.si_addr  = (void __user *)addr;
+	arm64_notify_die("", regs, &info, esr);
+}
+
+/*
+ * Handle stack alignment exceptions.
+ */
+asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
+					   unsigned int esr,
+					   struct pt_regs *regs)
+{
+	struct siginfo info;
+
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code  = BUS_ADRALN;
+	info.si_addr  = (void __user *)addr;
+	arm64_notify_die("", regs, &info, esr);
+}
+
+static struct fault_info debug_fault_info[] = {
+	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware breakpoint"	},
+	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware single-step"	},
+	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware watchpoint"	},
+	{ do_bad,	SIGBUS,		0,		"unknown 3"		},
+	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch32 BKPT"		},
+	{ do_bad,	SIGTRAP,	0,		"aarch32 vector catch"	},
+	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch64 BRK"		},
+	{ do_bad,	SIGBUS,		0,		"unknown 7"		},
+};
+
+void __init hook_debug_fault_code(int nr,
+				  int (*fn)(unsigned long, unsigned int, struct pt_regs *),
+				  int sig, int code, const char *name)
+{
+	BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
+
+	debug_fault_info[nr].fn		= fn;
+	debug_fault_info[nr].sig	= sig;
+	debug_fault_info[nr].code	= code;
+	debug_fault_info[nr].name	= name;
+}
+
+asmlinkage int __exception do_debug_exception(unsigned long addr,
+					      unsigned int esr,
+					      struct pt_regs *regs)
+{
+	const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
+	struct siginfo info;
+
+	if (!inf->fn(addr, esr, regs))
+		return 1;
+
+	pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
+		 inf->name, esr, addr);
+
+	info.si_signo = inf->sig;
+	info.si_errno = 0;
+	info.si_code  = inf->code;
+	info.si_addr  = (void __user *)addr;
+	arm64_notify_die("", regs, &info, 0);
+
+	return 0;
+}
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
new file mode 100644
index 000000000..b6f14e8d2
--- /dev/null
+++ b/arch/arm64/mm/flush.c
@@ -0,0 +1,122 @@
+/*
+ * Based on arch/arm/mm/flush.c
+ *
+ * Copyright (C) 1995-2002 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachetype.h>
+#include <asm/tlbflush.h>
+
+#include "mm.h"
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__flush_icache_all();
+}
+
+static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
+				unsigned long uaddr, void *kaddr,
+				unsigned long len)
+{
+	if (vma->vm_flags & VM_EXEC) {
+		unsigned long addr = (unsigned long)kaddr;
+		if (icache_is_aliasing()) {
+			__flush_dcache_area(kaddr, len);
+			__flush_icache_all();
+		} else {
+			flush_icache_range(addr, addr + len);
+		}
+	}
+}
+
+/*
+ * Copy user data from/to a page which is mapped into a different processes
+ * address space.  Really, we want to allow our "user space" model to handle
+ * this.
+ *
+ * Note that this code needs to run on the current CPU.
+ */
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long uaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+#ifdef CONFIG_SMP
+	preempt_disable();
+#endif
+	memcpy(dst, src, len);
+	flush_ptrace_access(vma, page, uaddr, dst, len);
+#ifdef CONFIG_SMP
+	preempt_enable();
+#endif
+}
+
+void __sync_icache_dcache(pte_t pte, unsigned long addr)
+{
+	struct page *page = pte_page(pte);
+
+	/* no flushing needed for anonymous pages */
+	if (!page_mapping(page))
+		return;
+
+	if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
+		__flush_dcache_area(page_address(page),
+				PAGE_SIZE << compound_order(page));
+		__flush_icache_all();
+	} else if (icache_is_aivivt()) {
+		__flush_icache_all();
+	}
+}
+
+/*
+ * This function is called when a page has been modified by the kernel. Mark
+ * it as dirty for later flushing when mapped in user space (if executable,
+ * see __sync_icache_dcache).
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (test_bit(PG_dcache_clean, &page->flags))
+		clear_bit(PG_dcache_clean, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+/*
+ * Additional functions defined in assembly.
+ */
+EXPORT_SYMBOL(flush_cache_all);
+EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp)
+{
+	pmd_t pmd = pmd_mksplitting(*pmdp);
+
+	VM_BUG_ON(address & ~PMD_MASK);
+	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+
+	/* dummy IPI to serialise against fast_gup */
+	kick_all_cpus_sync();
+}
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
new file mode 100644
index 000000000..0eeb4f093
--- /dev/null
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -0,0 +1,68 @@
+/*
+ * arch/arm64/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * Based on arch/x86/mm/hugetlbpage.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+#endif
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
+
+int pud_huge(pud_t pud)
+{
+#ifndef __PAGETABLE_PMD_FOLDED
+	return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
+#else
+	return 0;
+#endif
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long ps = memparse(opt, &opt);
+	if (ps == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (ps == PUD_SIZE) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
+		return 0;
+	}
+	return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
new file mode 100644
index 000000000..ad87ce826
--- /dev/null
+++ b/arch/arm64/mm/init.c
@@ -0,0 +1,376 @@
+/*
+ * Based on arch/arm/mm/init.c
+ *
+ * Copyright (C) 1995-2005 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mman.h>
+#include <linux/nodemask.h>
+#include <linux/initrd.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <linux/sort.h>
+#include <linux/of_fdt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-contiguous.h>
+#include <linux/efi.h>
+#include <linux/swiotlb.h>
+
+#include <asm/fixmap.h>
+#include <asm/memory.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sizes.h>
+#include <asm/tlb.h>
+#include <asm/alternative.h>
+
+#include "mm.h"
+
+phys_addr_t memstart_addr __read_mostly = 0;
+phys_addr_t arm64_dma_phys_limit __read_mostly;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static int __init early_initrd(char *p)
+{
+	unsigned long start, size;
+	char *endp;
+
+	start = memparse(p, &endp);
+	if (*endp == ',') {
+		size = memparse(endp + 1, NULL);
+
+		initrd_start = (unsigned long)__va(start);
+		initrd_end = (unsigned long)__va(start + size);
+	}
+	return 0;
+}
+early_param("initrd", early_initrd);
+#endif
+
+/*
+ * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
+ * currently assumes that for memory starting above 4G, 32-bit devices will
+ * use a DMA offset.
+ */
+static phys_addr_t max_zone_dma_phys(void)
+{
+	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
+	return min(offset + (1ULL << 32), memblock_end_of_DRAM());
+}
+
+static void __init zone_sizes_init(unsigned long min, unsigned long max)
+{
+	struct memblock_region *reg;
+	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
+	unsigned long max_dma = min;
+
+	memset(zone_size, 0, sizeof(zone_size));
+
+	/* 4GB maximum for 32-bit only capable devices */
+	if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+		max_dma = PFN_DOWN(arm64_dma_phys_limit);
+		zone_size[ZONE_DMA] = max_dma - min;
+	}
+	zone_size[ZONE_NORMAL] = max - max_dma;
+
+	memcpy(zhole_size, zone_size, sizeof(zhole_size));
+
+	for_each_memblock(memory, reg) {
+		unsigned long start = memblock_region_memory_base_pfn(reg);
+		unsigned long end = memblock_region_memory_end_pfn(reg);
+
+		if (start >= max)
+			continue;
+
+		if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+			unsigned long dma_end = min(end, max_dma);
+			zhole_size[ZONE_DMA] -= dma_end - start;
+		}
+
+		if (end > max_dma) {
+			unsigned long normal_end = min(end, max);
+			unsigned long normal_start = max(start, max_dma);
+			zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
+		}
+	}
+
+	free_area_init_node(0, zone_size, min, zhole_size);
+}
+
+#ifdef CONFIG_HAVE_ARCH_PFN_VALID
+int pfn_valid(unsigned long pfn)
+{
+	return memblock_is_memory(pfn << PAGE_SHIFT);
+}
+EXPORT_SYMBOL(pfn_valid);
+#endif
+
+#ifndef CONFIG_SPARSEMEM
+static void arm64_memory_present(void)
+{
+}
+#else
+static void arm64_memory_present(void)
+{
+	struct memblock_region *reg;
+
+	for_each_memblock(memory, reg)
+		memory_present(0, memblock_region_memory_base_pfn(reg),
+			       memblock_region_memory_end_pfn(reg));
+}
+#endif
+
+static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX;
+
+/*
+ * Limit the memory size that was specified via FDT.
+ */
+static int __init early_mem(char *p)
+{
+	if (!p)
+		return 1;
+
+	memory_limit = memparse(p, &p) & PAGE_MASK;
+	pr_notice("Memory limited to %lldMB\n", memory_limit >> 20);
+
+	return 0;
+}
+early_param("mem", early_mem);
+
+void __init arm64_memblock_init(void)
+{
+	memblock_enforce_memory_limit(memory_limit);
+
+	/*
+	 * Register the kernel text, kernel data, initrd, and initial
+	 * pagetables with memblock.
+	 */
+	memblock_reserve(__pa(_text), _end - _text);
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (initrd_start)
+		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
+#endif
+
+	early_init_fdt_scan_reserved_mem();
+
+	/* 4GB maximum for 32-bit only capable devices */
+	if (IS_ENABLED(CONFIG_ZONE_DMA))
+		arm64_dma_phys_limit = max_zone_dma_phys();
+	else
+		arm64_dma_phys_limit = PHYS_MASK + 1;
+	dma_contiguous_reserve(arm64_dma_phys_limit);
+
+	memblock_allow_resize();
+	memblock_dump_all();
+}
+
+void __init bootmem_init(void)
+{
+	unsigned long min, max;
+
+	min = PFN_UP(memblock_start_of_DRAM());
+	max = PFN_DOWN(memblock_end_of_DRAM());
+
+	early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
+
+	/*
+	 * Sparsemem tries to allocate bootmem in memory_present(), so must be
+	 * done after the fixed reservations.
+	 */
+	arm64_memory_present();
+
+	sparse_init();
+	zone_sizes_init(min, max);
+
+	high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
+	max_pfn = max_low_pfn = max;
+}
+
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct page *start_pg, *end_pg;
+	unsigned long pg, pgend;
+
+	/*
+	 * Convert start_pfn/end_pfn to a struct page pointer.
+	 */
+	start_pg = pfn_to_page(start_pfn - 1) + 1;
+	end_pg = pfn_to_page(end_pfn - 1) + 1;
+
+	/*
+	 * Convert to physical addresses, and round start upwards and end
+	 * downwards.
+	 */
+	pg = (unsigned long)PAGE_ALIGN(__pa(start_pg));
+	pgend = (unsigned long)__pa(end_pg) & PAGE_MASK;
+
+	/*
+	 * If there are free pages between these, free the section of the
+	 * memmap array.
+	 */
+	if (pg < pgend)
+		free_bootmem(pg, pgend - pg);
+}
+
+/*
+ * The mem_map array can get very big. Free the unused area of the memory map.
+ */
+static void __init free_unused_memmap(void)
+{
+	unsigned long start, prev_end = 0;
+	struct memblock_region *reg;
+
+	for_each_memblock(memory, reg) {
+		start = __phys_to_pfn(reg->base);
+
+#ifdef CONFIG_SPARSEMEM
+		/*
+		 * Take care not to free memmap entries that don't exist due
+		 * to SPARSEMEM sections which aren't present.
+		 */
+		start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
+#endif
+		/*
+		 * If we had a previous bank, and there is a space between the
+		 * current bank and the previous, free it.
+		 */
+		if (prev_end && prev_end < start)
+			free_memmap(prev_end, start);
+
+		/*
+		 * Align up here since the VM subsystem insists that the
+		 * memmap entries are valid from the bank end aligned to
+		 * MAX_ORDER_NR_PAGES.
+		 */
+		prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
+				 MAX_ORDER_NR_PAGES);
+	}
+
+#ifdef CONFIG_SPARSEMEM
+	if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION))
+		free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
+#endif
+}
+#endif	/* !CONFIG_SPARSEMEM_VMEMMAP */
+
+/*
+ * mem_init() marks the free areas in the mem_map and tells us how much memory
+ * is free.  This is done after various parts of the system have claimed their
+ * memory after the kernel image.
+ */
+void __init mem_init(void)
+{
+	swiotlb_init(1);
+
+	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
+
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+	free_unused_memmap();
+#endif
+	/* this will put all unused low memory onto the freelists */
+	free_all_bootmem();
+
+	mem_init_print_info(NULL);
+
+#define MLK(b, t) b, t, ((t) - (b)) >> 10
+#define MLM(b, t) b, t, ((t) - (b)) >> 20
+#define MLG(b, t) b, t, ((t) - (b)) >> 30
+#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
+
+	pr_notice("Virtual kernel memory layout:\n"
+		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
+		  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"
+#endif
+		  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"
+		  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"
+		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
+		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n"
+		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		  MLG(VMALLOC_START, VMALLOC_END),
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+		  MLG((unsigned long)vmemmap,
+		      (unsigned long)vmemmap + VMEMMAP_SIZE),
+		  MLM((unsigned long)virt_to_page(PAGE_OFFSET),
+		      (unsigned long)virt_to_page(high_memory)),
+#endif
+		  MLK(FIXADDR_START, FIXADDR_TOP),
+		  MLM(PCI_IO_START, PCI_IO_END),
+		  MLM(MODULES_VADDR, MODULES_END),
+		  MLM(PAGE_OFFSET, (unsigned long)high_memory),
+		  MLK_ROUNDUP(__init_begin, __init_end),
+		  MLK_ROUNDUP(_text, _etext),
+		  MLK_ROUNDUP(_sdata, _edata));
+
+#undef MLK
+#undef MLM
+#undef MLK_ROUNDUP
+
+	/*
+	 * Check boundaries twice: Some fundamental inconsistencies can be
+	 * detected at build time already.
+	 */
+#ifdef CONFIG_COMPAT
+	BUILD_BUG_ON(TASK_SIZE_32			> TASK_SIZE_64);
+#endif
+	BUILD_BUG_ON(TASK_SIZE_64			> MODULES_VADDR);
+	BUG_ON(TASK_SIZE_64				> MODULES_VADDR);
+
+	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
+		extern int sysctl_overcommit_memory;
+		/*
+		 * On a machine this small we won't get anywhere without
+		 * overcommit, so turn it on by default.
+		 */
+		sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
+	}
+}
+
+void free_initmem(void)
+{
+	fixup_init();
+	free_initmem_default(0);
+	free_alternatives_memory();
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static int keep_initrd;
+
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (!keep_initrd)
+		free_reserved_area((void *)start, (void *)end, 0, "initrd");
+}
+
+static int __init keepinitrd_setup(char *__unused)
+{
+	keep_initrd = 1;
+	return 1;
+}
+
+__setup("keepinitrd", keepinitrd_setup);
+#endif
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
new file mode 100644
index 000000000..01e88c8bc
--- /dev/null
+++ b/arch/arm64/mm/ioremap.c
@@ -0,0 +1,113 @@
+/*
+ * Based on arch/arm/mm/ioremap.c
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ * Hacked for ARM by Phil Blundell <philb@gnu.org>
+ * Hacked to allow all architectures to build, and various cleanups
+ * by Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/io.h>
+
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
+				      pgprot_t prot, void *caller)
+{
+	unsigned long last_addr;
+	unsigned long offset = phys_addr & ~PAGE_MASK;
+	int err;
+	unsigned long addr;
+	struct vm_struct *area;
+
+	/*
+	 * Page align the mapping address and size, taking account of any
+	 * offset.
+	 */
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(size + offset);
+
+	/*
+	 * Don't allow wraparound, zero size or outside PHYS_MASK.
+	 */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK))
+		return NULL;
+
+	/*
+	 * Don't allow RAM to be mapped.
+	 */
+	if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
+		return NULL;
+
+	area = get_vm_area_caller(size, VM_IOREMAP, caller);
+	if (!area)
+		return NULL;
+	addr = (unsigned long)area->addr;
+	area->phys_addr = phys_addr;
+
+	err = ioremap_page_range(addr, addr + size, phys_addr, prot);
+	if (err) {
+		vunmap((void *)addr);
+		return NULL;
+	}
+
+	return (void __iomem *)(offset + addr);
+}
+
+void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
+{
+	return __ioremap_caller(phys_addr, size, prot,
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(volatile void __iomem *io_addr)
+{
+	unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
+
+	/*
+	 * We could get an address outside vmalloc range in case
+	 * of ioremap_cache() reusing a RAM mapping.
+	 */
+	if (VMALLOC_START <= addr && addr < VMALLOC_END)
+		vunmap((void *)addr);
+}
+EXPORT_SYMBOL(__iounmap);
+
+void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
+{
+	/* For normal memory we already have a cacheable mapping. */
+	if (pfn_valid(__phys_to_pfn(phys_addr)))
+		return (void __iomem *)__phys_to_virt(phys_addr);
+
+	return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+/*
+ * Must be called after early_fixmap_init
+ */
+void __init early_ioremap_init(void)
+{
+	early_ioremap_setup();
+}
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
new file mode 100644
index 000000000..ef47d99b5
--- /dev/null
+++ b/arch/arm64/mm/mm.h
@@ -0,0 +1,3 @@
+extern void __init bootmem_init(void);
+
+void fixup_init(void);
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
new file mode 100644
index 000000000..ed177475d
--- /dev/null
+++ b/arch/arm64/mm/mmap.c
@@ -0,0 +1,138 @@
+/*
+ * Based on arch/arm/mm/mmap.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/elf.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/export.h>
+#include <linux/shm.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+
+#include <asm/cputype.h>
+
+/*
+ * Leave enough space between the mmap area and the stack to honour ulimit in
+ * the face of randomisation.
+ */
+#define MIN_GAP (SZ_128M + ((STACK_RND_MASK << PAGE_SHIFT) + 1))
+#define MAX_GAP	(STACK_TOP/6*5)
+
+static int mmap_is_legacy(void)
+{
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+
+	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+		return 1;
+
+	return sysctl_legacy_va_layout;
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+	unsigned long rnd;
+
+	rnd = (unsigned long)get_random_int() & STACK_RND_MASK;
+
+	return rnd << PAGE_SHIFT;
+}
+
+static unsigned long mmap_base(unsigned long rnd)
+{
+	unsigned long gap = rlimit(RLIMIT_STACK);
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return PAGE_ALIGN(STACK_TOP - gap - rnd);
+}
+
+/*
+ * This function, called very early during the creation of a new process VM
+ * image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
+	/*
+	 * Fall back to the standard layout if the personality bit is set, or
+	 * if the expected stack growth is unlimited:
+	 */
+	if (mmap_is_legacy()) {
+		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+	} else {
+		mm->mmap_base = mmap_base(random_factor);
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+	}
+}
+EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+
+
+/*
+ * You really shouldn't be using read() or write() on /dev/mem.  This might go
+ * away in the future.
+ */
+int valid_phys_addr_range(phys_addr_t addr, size_t size)
+{
+	if (addr < PHYS_OFFSET)
+		return 0;
+	if (addr + size > __pa(high_memory - 1) + 1)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Do not allow /dev/mem mappings beyond the supported physical range.
+ */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
+{
+	return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK);
+}
+
+#ifdef CONFIG_STRICT_DEVMEM
+
+#include <linux/ioport.h>
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.  We mimic x86 here by
+ * disallowing access to system RAM as well as device-exclusive MMIO regions.
+ * This effectively disable read()/write() on /dev/mem.
+ */
+int devmem_is_allowed(unsigned long pfn)
+{
+	if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+		return 0;
+	if (!page_is_ram(pfn))
+		return 1;
+	return 0;
+}
+
+#endif
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
new file mode 100644
index 000000000..5b8b66442
--- /dev/null
+++ b/arch/arm64/mm/mmu.c
@@ -0,0 +1,645 @@
+/*
+ * Based on arch/arm/mm/mmu.c
+ *
+ * Copyright (C) 1995-2005 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/nodemask.h>
+#include <linux/memblock.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cputype.h>
+#include <asm/fixmap.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sizes.h>
+#include <asm/tlb.h>
+#include <asm/memblock.h>
+#include <asm/mmu_context.h>
+
+#include "mm.h"
+
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
+/*
+ * Empty_zero_page is a special page that is used for zero-initialized data
+ * and COW.
+ */
+struct page *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (!pfn_valid(pfn))
+		return pgprot_noncached(vma_prot);
+	else if (file->f_flags & O_SYNC)
+		return pgprot_writecombine(vma_prot);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+static void __init *early_alloc(unsigned long sz)
+{
+	void *ptr = __va(memblock_alloc(sz, sz));
+	BUG_ON(!ptr);
+	memset(ptr, 0, sz);
+	return ptr;
+}
+
+/*
+ * remap a PMD into pages
+ */
+static void split_pmd(pmd_t *pmd, pte_t *pte)
+{
+	unsigned long pfn = pmd_pfn(*pmd);
+	int i = 0;
+
+	do {
+		/*
+		 * Need to have the least restrictive permissions available
+		 * permissions will be fixed up later
+		 */
+		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+		pfn++;
+	} while (pte++, i++, i < PTRS_PER_PTE);
+}
+
+static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
+				  unsigned long end, unsigned long pfn,
+				  pgprot_t prot,
+				  void *(*alloc)(unsigned long size))
+{
+	pte_t *pte;
+
+	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
+		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+		if (pmd_sect(*pmd))
+			split_pmd(pmd, pte);
+		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+		flush_tlb_all();
+	}
+	BUG_ON(pmd_bad(*pmd));
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		set_pte(pte, pfn_pte(pfn, prot));
+		pfn++;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+void split_pud(pud_t *old_pud, pmd_t *pmd)
+{
+	unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
+	pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
+	int i = 0;
+
+	do {
+		set_pmd(pmd, __pmd(addr | prot));
+		addr += PMD_SIZE;
+	} while (pmd++, i++, i < PTRS_PER_PMD);
+}
+
+static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
+				  unsigned long addr, unsigned long end,
+				  phys_addr_t phys, pgprot_t prot,
+				  void *(*alloc)(unsigned long size))
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	/*
+	 * Check for initial section mappings in the pgd/pud and remove them.
+	 */
+	if (pud_none(*pud) || pud_sect(*pud)) {
+		pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+		if (pud_sect(*pud)) {
+			/*
+			 * need to have the 1G of mappings continue to be
+			 * present
+			 */
+			split_pud(pud, pmd);
+		}
+		pud_populate(mm, pud, pmd);
+		flush_tlb_all();
+	}
+	BUG_ON(pud_bad(*pud));
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		/* try section mapping first */
+		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+			pmd_t old_pmd =*pmd;
+			set_pmd(pmd, __pmd(phys |
+					   pgprot_val(mk_sect_prot(prot))));
+			/*
+			 * Check for previous table entries created during
+			 * boot (__create_page_tables) and flush them.
+			 */
+			if (!pmd_none(old_pmd)) {
+				flush_tlb_all();
+				if (pmd_table(old_pmd)) {
+					phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
+					if (!WARN_ON_ONCE(slab_is_available()))
+						memblock_free(table, PAGE_SIZE);
+				}
+			}
+		} else {
+			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
+				       prot, alloc);
+		}
+		phys += next - addr;
+	} while (pmd++, addr = next, addr != end);
+}
+
+static inline bool use_1G_block(unsigned long addr, unsigned long next,
+			unsigned long phys)
+{
+	if (PAGE_SHIFT != 12)
+		return false;
+
+	if (((addr | next | phys) & ~PUD_MASK) != 0)
+		return false;
+
+	return true;
+}
+
+static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
+				  unsigned long addr, unsigned long end,
+				  phys_addr_t phys, pgprot_t prot,
+				  void *(*alloc)(unsigned long size))
+{
+	pud_t *pud;
+	unsigned long next;
+
+	if (pgd_none(*pgd)) {
+		pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
+		pgd_populate(mm, pgd, pud);
+	}
+	BUG_ON(pgd_bad(*pgd));
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+
+		/*
+		 * For 4K granule only, attempt to put down a 1GB block
+		 */
+		if (use_1G_block(addr, next, phys)) {
+			pud_t old_pud = *pud;
+			set_pud(pud, __pud(phys |
+					   pgprot_val(mk_sect_prot(prot))));
+
+			/*
+			 * If we have an old value for a pud, it will
+			 * be pointing to a pmd table that we no longer
+			 * need (from swapper_pg_dir).
+			 *
+			 * Look up the old pmd table and free it.
+			 */
+			if (!pud_none(old_pud)) {
+				flush_tlb_all();
+				if (pud_table(old_pud)) {
+					phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+					if (!WARN_ON_ONCE(slab_is_available()))
+						memblock_free(table, PAGE_SIZE);
+				}
+			}
+		} else {
+			alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
+		}
+		phys += next - addr;
+	} while (pud++, addr = next, addr != end);
+}
+
+/*
+ * Create the page directory entries and any necessary page tables for the
+ * mapping specified by 'md'.
+ */
+static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
+				    phys_addr_t phys, unsigned long virt,
+				    phys_addr_t size, pgprot_t prot,
+				    void *(*alloc)(unsigned long size))
+{
+	unsigned long addr, length, end, next;
+
+	addr = virt & PAGE_MASK;
+	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
+
+	end = addr + length;
+	do {
+		next = pgd_addr_end(addr, end);
+		alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
+		phys += next - addr;
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void *late_alloc(unsigned long size)
+{
+	void *ptr;
+
+	BUG_ON(size > PAGE_SIZE);
+	ptr = (void *)__get_free_page(PGALLOC_GFP);
+	BUG_ON(!ptr);
+	return ptr;
+}
+
+static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
+				  phys_addr_t size, pgprot_t prot)
+{
+	if (virt < VMALLOC_START) {
+		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+			&phys, virt);
+		return;
+	}
+	__create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
+			 size, prot, early_alloc);
+}
+
+void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
+			       unsigned long virt, phys_addr_t size,
+			       pgprot_t prot)
+{
+	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
+				late_alloc);
+}
+
+static void create_mapping_late(phys_addr_t phys, unsigned long virt,
+				  phys_addr_t size, pgprot_t prot)
+{
+	if (virt < VMALLOC_START) {
+		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+			&phys, virt);
+		return;
+	}
+
+	return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
+				phys, virt, size, prot, late_alloc);
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+	/*
+	 * Set up the executable regions using the existing section mappings
+	 * for now. This will get more fine grained later once all memory
+	 * is mapped
+	 */
+	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+
+	if (end < kernel_x_start) {
+		create_mapping(start, __phys_to_virt(start),
+			end - start, PAGE_KERNEL);
+	} else if (start >= kernel_x_end) {
+		create_mapping(start, __phys_to_virt(start),
+			end - start, PAGE_KERNEL);
+	} else {
+		if (start < kernel_x_start)
+			create_mapping(start, __phys_to_virt(start),
+				kernel_x_start - start,
+				PAGE_KERNEL);
+		create_mapping(kernel_x_start,
+				__phys_to_virt(kernel_x_start),
+				kernel_x_end - kernel_x_start,
+				PAGE_KERNEL_EXEC);
+		if (kernel_x_end < end)
+			create_mapping(kernel_x_end,
+				__phys_to_virt(kernel_x_end),
+				end - kernel_x_end,
+				PAGE_KERNEL);
+	}
+
+}
+#else
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+	create_mapping(start, __phys_to_virt(start), end - start,
+			PAGE_KERNEL_EXEC);
+}
+#endif
+
+static void __init map_mem(void)
+{
+	struct memblock_region *reg;
+	phys_addr_t limit;
+
+	/*
+	 * Temporarily limit the memblock range. We need to do this as
+	 * create_mapping requires puds, pmds and ptes to be allocated from
+	 * memory addressable from the initial direct kernel mapping.
+	 *
+	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
+	 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
+	 * PHYS_OFFSET (which must be aligned to 2MB as per
+	 * Documentation/arm64/booting.txt).
+	 */
+	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
+		limit = PHYS_OFFSET + PMD_SIZE;
+	else
+		limit = PHYS_OFFSET + PUD_SIZE;
+	memblock_set_current_limit(limit);
+
+	/* map all the memory banks */
+	for_each_memblock(memory, reg) {
+		phys_addr_t start = reg->base;
+		phys_addr_t end = start + reg->size;
+
+		if (start >= end)
+			break;
+
+#ifndef CONFIG_ARM64_64K_PAGES
+		/*
+		 * For the first memory bank align the start address and
+		 * current memblock limit to prevent create_mapping() from
+		 * allocating pte page tables from unmapped memory.
+		 * When 64K pages are enabled, the pte page table for the
+		 * first PGDIR_SIZE is already present in swapper_pg_dir.
+		 */
+		if (start < limit)
+			start = ALIGN(start, PMD_SIZE);
+		if (end < limit) {
+			limit = end & PMD_MASK;
+			memblock_set_current_limit(limit);
+		}
+#endif
+		__map_memblock(start, end);
+	}
+
+	/* Limit no longer required. */
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+}
+
+void __init fixup_executable(void)
+{
+#ifdef CONFIG_DEBUG_RODATA
+	/* now that we are actually fully mapped, make the start/end more fine grained */
+	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
+		unsigned long aligned_start = round_down(__pa(_stext),
+							SECTION_SIZE);
+
+		create_mapping(aligned_start, __phys_to_virt(aligned_start),
+				__pa(_stext) - aligned_start,
+				PAGE_KERNEL);
+	}
+
+	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
+		unsigned long aligned_end = round_up(__pa(__init_end),
+							SECTION_SIZE);
+		create_mapping(__pa(__init_end), (unsigned long)__init_end,
+				aligned_end - __pa(__init_end),
+				PAGE_KERNEL);
+	}
+#endif
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void)
+{
+	create_mapping_late(__pa(_stext), (unsigned long)_stext,
+				(unsigned long)_etext - (unsigned long)_stext,
+				PAGE_KERNEL_EXEC | PTE_RDONLY);
+
+}
+#endif
+
+void fixup_init(void)
+{
+	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
+			(unsigned long)__init_end - (unsigned long)__init_begin,
+			PAGE_KERNEL);
+}
+
+/*
+ * paging_init() sets up the page tables, initialises the zone memory
+ * maps and sets up the zero page.
+ */
+void __init paging_init(void)
+{
+	void *zero_page;
+
+	map_mem();
+	fixup_executable();
+
+	/* allocate the zero page. */
+	zero_page = early_alloc(PAGE_SIZE);
+
+	bootmem_init();
+
+	empty_zero_page = virt_to_page(zero_page);
+
+	/*
+	 * TTBR0 is only used for the identity mapping at this stage. Make it
+	 * point to zero page to avoid speculatively fetching new entries.
+	 */
+	cpu_set_reserved_ttbr0();
+	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
+}
+
+/*
+ * Enable the identity mapping to allow the MMU disabling.
+ */
+void setup_mm_for_reboot(void)
+{
+	cpu_set_reserved_ttbr0();
+	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
+/*
+ * Check whether a kernel address is valid (derived from arch/x86/).
+ */
+int kern_addr_valid(unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	if ((((long)addr) >> VA_BITS) != -1UL)
+		return 0;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return 0;
+
+	if (pud_sect(*pud))
+		return pfn_valid(pud_pfn(*pud));
+
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return 0;
+
+	if (pmd_sect(*pmd))
+		return pfn_valid(pmd_pfn(*pmd));
+
+	pte = pte_offset_kernel(pmd, addr);
+	if (pte_none(*pte))
+		return 0;
+
+	return pfn_valid(pte_pfn(*pte));
+}
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#ifdef CONFIG_ARM64_64K_PAGES
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+{
+	return vmemmap_populate_basepages(start, end, node);
+}
+#else	/* !CONFIG_ARM64_64K_PAGES */
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+{
+	unsigned long addr = start;
+	unsigned long next;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	do {
+		next = pmd_addr_end(addr, end);
+
+		pgd = vmemmap_pgd_populate(addr, node);
+		if (!pgd)
+			return -ENOMEM;
+
+		pud = vmemmap_pud_populate(pgd, addr, node);
+		if (!pud)
+			return -ENOMEM;
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd)) {
+			void *p = NULL;
+
+			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+			if (!p)
+				return -ENOMEM;
+
+			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
+		} else
+			vmemmap_verify((pte_t *)pmd, node, addr, next);
+	} while (addr = next, addr != end);
+
+	return 0;
+}
+#endif	/* CONFIG_ARM64_64K_PAGES */
+void vmemmap_free(unsigned long start, unsigned long end)
+{
+}
+#endif	/* CONFIG_SPARSEMEM_VMEMMAP */
+
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+#if CONFIG_PGTABLE_LEVELS > 2
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
+#endif
+#if CONFIG_PGTABLE_LEVELS > 3
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
+#endif
+
+static inline pud_t * fixmap_pud(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+
+	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+
+	return pud_offset(pgd, addr);
+}
+
+static inline pmd_t * fixmap_pmd(unsigned long addr)
+{
+	pud_t *pud = fixmap_pud(addr);
+
+	BUG_ON(pud_none(*pud) || pud_bad(*pud));
+
+	return pmd_offset(pud, addr);
+}
+
+static inline pte_t * fixmap_pte(unsigned long addr)
+{
+	pmd_t *pmd = fixmap_pmd(addr);
+
+	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
+
+	return pte_offset_kernel(pmd, addr);
+}
+
+void __init early_fixmap_init(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr = FIXADDR_START;
+
+	pgd = pgd_offset_k(addr);
+	pgd_populate(&init_mm, pgd, bm_pud);
+	pud = pud_offset(pgd, addr);
+	pud_populate(&init_mm, pud, bm_pmd);
+	pmd = pmd_offset(pud, addr);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
+	/*
+	 * The boot-ioremap range spans multiple pmds, for which
+	 * we are not preparted:
+	 */
+	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+		WARN_ON(1);
+		pr_warn("pmd %p != %p, %p\n",
+			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
+		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+			fix_to_virt(FIX_BTMAP_BEGIN));
+		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+			fix_to_virt(FIX_BTMAP_END));
+
+		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
+	}
+}
+
+void __set_fixmap(enum fixed_addresses idx,
+			       phys_addr_t phys, pgprot_t flags)
+{
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
+
+	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
+
+	pte = fixmap_pte(addr);
+
+	if (pgprot_val(flags)) {
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+	} else {
+		pte_clear(&init_mm, addr, pte);
+		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
+	}
+}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
new file mode 100644
index 000000000..e47ed1c5d
--- /dev/null
+++ b/arch/arm64/mm/pageattr.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+struct page_change_data {
+	pgprot_t set_mask;
+	pgprot_t clear_mask;
+};
+
+static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
+			void *data)
+{
+	struct page_change_data *cdata = data;
+	pte_t pte = *ptep;
+
+	pte = clear_pte_bit(pte, cdata->clear_mask);
+	pte = set_pte_bit(pte, cdata->set_mask);
+
+	set_pte(ptep, pte);
+	return 0;
+}
+
+static int change_memory_common(unsigned long addr, int numpages,
+				pgprot_t set_mask, pgprot_t clear_mask)
+{
+	unsigned long start = addr;
+	unsigned long size = PAGE_SIZE*numpages;
+	unsigned long end = start + size;
+	int ret;
+	struct page_change_data data;
+
+	if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+		start &= PAGE_MASK;
+		end = start + size;
+		WARN_ON_ONCE(1);
+	}
+
+	if (start < MODULES_VADDR || start >= MODULES_END)
+		return -EINVAL;
+
+	if (end < MODULES_VADDR || end >= MODULES_END)
+		return -EINVAL;
+
+	data.set_mask = set_mask;
+	data.clear_mask = clear_mask;
+
+	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+					&data);
+
+	flush_tlb_kernel_range(start, end);
+	return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(PTE_RDONLY),
+					__pgprot(PTE_WRITE));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(PTE_WRITE),
+					__pgprot(PTE_RDONLY));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(PTE_PXN),
+					__pgprot(0));
+}
+EXPORT_SYMBOL_GPL(set_memory_nx);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(0),
+					__pgprot(PTE_PXN));
+}
+EXPORT_SYMBOL_GPL(set_memory_x);
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
new file mode 100644
index 000000000..71ca104f9
--- /dev/null
+++ b/arch/arm64/mm/pgd.c
@@ -0,0 +1,61 @@
+/*
+ * PGD allocation/freeing
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+
+#include "mm.h"
+
+#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
+
+static struct kmem_cache *pgd_cache;
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	if (PGD_SIZE == PAGE_SIZE)
+		return (pgd_t *)__get_free_page(PGALLOC_GFP);
+	else
+		return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	if (PGD_SIZE == PAGE_SIZE)
+		free_page((unsigned long)pgd);
+	else
+		kmem_cache_free(pgd_cache, pgd);
+}
+
+static int __init pgd_cache_init(void)
+{
+	/*
+	 * Naturally aligned pgds required by the architecture.
+	 */
+	if (PGD_SIZE != PAGE_SIZE)
+		pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
+					      SLAB_PANIC, NULL);
+	return 0;
+}
+core_initcall(pgd_cache_init);
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
new file mode 100644
index 000000000..4c4d93c4b
--- /dev/null
+++ b/arch/arm64/mm/proc-macros.S
@@ -0,0 +1,64 @@
+/*
+ * Based on arch/arm/mm/proc-macros.S
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+/*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+	.macro	vma_vm_mm, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_MM]
+	.endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+	.macro	mmid, rd, rn
+	ldr	\rd, [\rn, #MM_CONTEXT_ID]
+	.endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+	.macro	dcache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register.
+ */
+	.macro	icache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	and	\tmp, \tmp, #0xf		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	ldr_l	\tmpreg, idmap_t0sz
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
new file mode 100644
index 000000000..cdd754e19
--- /dev/null
+++ b/arch/arm64/mm/proc.S
@@ -0,0 +1,264 @@
+/*
+ * Based on arch/arm/mm/proc.S
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+
+#include "proc-macros.S"
+
+#ifdef CONFIG_ARM64_64K_PAGES
+#define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
+#else
+#define TCR_TG_FLAGS	TCR_TG0_4K | TCR_TG1_4K
+#endif
+
+#ifdef CONFIG_SMP
+#define TCR_SMP_FLAGS	TCR_SHARED
+#else
+#define TCR_SMP_FLAGS	0
+#endif
+
+/* PTWs cacheable, inner/outer WBWA */
+#define TCR_CACHE_FLAGS	TCR_IRGN_WBWA | TCR_ORGN_WBWA
+
+#define MAIR(attr, mt)	((attr) << ((mt) * 8))
+
+/*
+ *	cpu_cache_off()
+ *
+ *	Turn the CPU D-cache off.
+ */
+ENTRY(cpu_cache_off)
+	mrs	x0, sctlr_el1
+	bic	x0, x0, #1 << 2			// clear SCTLR.C
+	msr	sctlr_el1, x0
+	isb
+	ret
+ENDPROC(cpu_cache_off)
+
+/*
+ *	cpu_reset(loc)
+ *
+ *	Perform a soft reset of the system.  Put the CPU into the same state
+ *	as it would be if it had been reset, and branch to what would be the
+ *	reset vector. It must be executed with the flat identity mapping.
+ *
+ *	- loc   - location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_reset)
+	mrs	x1, sctlr_el1
+	bic	x1, x1, #1
+	msr	sctlr_el1, x1			// disable the MMU
+	isb
+	ret	x0
+ENDPROC(cpu_reset)
+
+ENTRY(cpu_soft_restart)
+	/* Save address of cpu_reset() and reset address */
+	mov	x19, x0
+	mov	x20, x1
+
+	/* Turn D-cache off */
+	bl	cpu_cache_off
+
+	/* Push out all dirty data, and ensure cache is empty */
+	bl	flush_cache_all
+
+	mov	x0, x20
+	ret	x19
+ENDPROC(cpu_soft_restart)
+
+/*
+ *	cpu_do_idle()
+ *
+ *	Idle the processor (wait for interrupt).
+ */
+ENTRY(cpu_do_idle)
+	dsb	sy				// WFI may enter a low-power mode
+	wfi
+	ret
+ENDPROC(cpu_do_idle)
+
+#ifdef CONFIG_CPU_PM
+/**
+ * cpu_do_suspend - save CPU registers context
+ *
+ * x0: virtual address of context pointer
+ */
+ENTRY(cpu_do_suspend)
+	mrs	x2, tpidr_el0
+	mrs	x3, tpidrro_el0
+	mrs	x4, contextidr_el1
+	mrs	x5, mair_el1
+	mrs	x6, cpacr_el1
+	mrs	x7, ttbr1_el1
+	mrs	x8, tcr_el1
+	mrs	x9, vbar_el1
+	mrs	x10, mdscr_el1
+	mrs	x11, oslsr_el1
+	mrs	x12, sctlr_el1
+	stp	x2, x3, [x0]
+	stp	x4, x5, [x0, #16]
+	stp	x6, x7, [x0, #32]
+	stp	x8, x9, [x0, #48]
+	stp	x10, x11, [x0, #64]
+	str	x12, [x0, #80]
+	ret
+ENDPROC(cpu_do_suspend)
+
+/**
+ * cpu_do_resume - restore CPU register context
+ *
+ * x0: Physical address of context pointer
+ * x1: ttbr0_el1 to be restored
+ *
+ * Returns:
+ *	sctlr_el1 value in x0
+ */
+ENTRY(cpu_do_resume)
+	/*
+	 * Invalidate local tlb entries before turning on MMU
+	 */
+	tlbi	vmalle1
+	ldp	x2, x3, [x0]
+	ldp	x4, x5, [x0, #16]
+	ldp	x6, x7, [x0, #32]
+	ldp	x8, x9, [x0, #48]
+	ldp	x10, x11, [x0, #64]
+	ldr	x12, [x0, #80]
+	msr	tpidr_el0, x2
+	msr	tpidrro_el0, x3
+	msr	contextidr_el1, x4
+	msr	mair_el1, x5
+	msr	cpacr_el1, x6
+	msr	ttbr0_el1, x1
+	msr	ttbr1_el1, x7
+	tcr_set_idmap_t0sz x8, x7
+	msr	tcr_el1, x8
+	msr	vbar_el1, x9
+	msr	mdscr_el1, x10
+	/*
+	 * Restore oslsr_el1 by writing oslar_el1
+	 */
+	ubfx	x11, x11, #1, #1
+	msr	oslar_el1, x11
+	mov	x0, x12
+	dsb	nsh		// Make sure local tlb invalidation completed
+	isb
+	ret
+ENDPROC(cpu_do_resume)
+#endif
+
+/*
+ *	cpu_do_switch_mm(pgd_phys, tsk)
+ *
+ *	Set the translation table base pointer to be pgd_phys.
+ *
+ *	- pgd_phys - physical address of new TTB
+ */
+ENTRY(cpu_do_switch_mm)
+	mmid	w1, x1				// get mm->context.id
+	bfi	x0, x1, #48, #16		// set the ASID
+	msr	ttbr0_el1, x0			// set TTBR0
+	isb
+	ret
+ENDPROC(cpu_do_switch_mm)
+
+	.section ".text.init", #alloc, #execinstr
+
+/*
+ *	__cpu_setup
+ *
+ *	Initialise the processor for turning the MMU on.  Return in x0 the
+ *	value of the SCTLR_EL1 register.
+ */
+ENTRY(__cpu_setup)
+	ic	iallu				// I+BTB cache invalidate
+	tlbi	vmalle1is			// invalidate I + D TLBs
+	dsb	ish
+
+	mov	x0, #3 << 20
+	msr	cpacr_el1, x0			// Enable FP/ASIMD
+	msr	mdscr_el1, xzr			// Reset mdscr_el1
+	/*
+	 * Memory region attributes for LPAE:
+	 *
+	 *   n = AttrIndx[2:0]
+	 *			n	MAIR
+	 *   DEVICE_nGnRnE	000	00000000
+	 *   DEVICE_nGnRE	001	00000100
+	 *   DEVICE_GRE		010	00001100
+	 *   NORMAL_NC		011	01000100
+	 *   NORMAL		100	11111111
+	 */
+	ldr	x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
+		     MAIR(0x04, MT_DEVICE_nGnRE) | \
+		     MAIR(0x0c, MT_DEVICE_GRE) | \
+		     MAIR(0x44, MT_NORMAL_NC) | \
+		     MAIR(0xff, MT_NORMAL)
+	msr	mair_el1, x5
+	/*
+	 * Prepare SCTLR
+	 */
+	adr	x5, crval
+	ldp	w5, w6, [x5]
+	mrs	x0, sctlr_el1
+	bic	x0, x0, x5			// clear bits
+	orr	x0, x0, x6			// set bits
+	/*
+	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
+	 * both user and kernel.
+	 */
+	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
+			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+	tcr_set_idmap_t0sz	x10, x9
+
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
+	 * TCR_EL1.
+	 */
+	mrs	x9, ID_AA64MMFR0_EL1
+	bfi	x10, x9, #32, #3
+	msr	tcr_el1, x10
+	ret					// return to head.S
+ENDPROC(__cpu_setup)
+
+	/*
+	 * We set the desired value explicitly, including those of the
+	 * reserved bits. The values of bits EE & E0E were set early in
+	 * el2_setup, which are left untouched below.
+	 *
+	 *                 n n            T
+	 *       U E      WT T UD     US IHBS
+	 *       CE0      XWHW CZ     ME TEEA S
+	 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
+	 * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved
+	 * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings
+	 */
+	.type	crval, #object
+crval:
+	.word	0xfcffffff			// clear
+	.word	0x34d5d91d			// set
diff --git a/arch/arm64/net/Makefile b/arch/arm64/net/Makefile
new file mode 100644
index 000000000..da9763378
--- /dev/null
+++ b/arch/arm64/net/Makefile
@@ -0,0 +1,4 @@
+#
+# ARM64 networking code
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
new file mode 100644
index 000000000..98a26ce82
--- /dev/null
+++ b/arch/arm64/net/bpf_jit.h
@@ -0,0 +1,177 @@
+/*
+ * BPF JIT compiler for ARM64
+ *
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+#include <asm/insn.h>
+
+/* 5-bit Register Operand */
+#define A64_R(x)	AARCH64_INSN_REG_##x
+#define A64_FP		AARCH64_INSN_REG_FP
+#define A64_LR		AARCH64_INSN_REG_LR
+#define A64_ZR		AARCH64_INSN_REG_ZR
+#define A64_SP		AARCH64_INSN_REG_SP
+
+#define A64_VARIANT(sf) \
+	((sf) ? AARCH64_INSN_VARIANT_64BIT : AARCH64_INSN_VARIANT_32BIT)
+
+/* Compare & branch (immediate) */
+#define A64_COMP_BRANCH(sf, Rt, offset, type) \
+	aarch64_insn_gen_comp_branch_imm(0, offset, Rt, A64_VARIANT(sf), \
+		AARCH64_INSN_BRANCH_COMP_##type)
+#define A64_CBZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, ZERO)
+
+/* Conditional branch (immediate) */
+#define A64_COND_BRANCH(cond, offset) \
+	aarch64_insn_gen_cond_branch_imm(0, offset, cond)
+#define A64_COND_EQ	AARCH64_INSN_COND_EQ /* == */
+#define A64_COND_NE	AARCH64_INSN_COND_NE /* != */
+#define A64_COND_CS	AARCH64_INSN_COND_CS /* unsigned >= */
+#define A64_COND_HI	AARCH64_INSN_COND_HI /* unsigned > */
+#define A64_COND_GE	AARCH64_INSN_COND_GE /* signed >= */
+#define A64_COND_GT	AARCH64_INSN_COND_GT /* signed > */
+#define A64_B_(cond, imm19) A64_COND_BRANCH(cond, (imm19) << 2)
+
+/* Unconditional branch (immediate) */
+#define A64_BRANCH(offset, type) aarch64_insn_gen_branch_imm(0, offset, \
+	AARCH64_INSN_BRANCH_##type)
+#define A64_B(imm26)  A64_BRANCH((imm26) << 2, NOLINK)
+#define A64_BL(imm26) A64_BRANCH((imm26) << 2, LINK)
+
+/* Unconditional branch (register) */
+#define A64_BLR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_LINK)
+#define A64_RET(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_RETURN)
+
+/* Load/store register (register offset) */
+#define A64_LS_REG(Rt, Rn, Rm, size, type) \
+	aarch64_insn_gen_load_store_reg(Rt, Rn, Rm, \
+		AARCH64_INSN_SIZE_##size, \
+		AARCH64_INSN_LDST_##type##_REG_OFFSET)
+#define A64_STRB(Wt, Xn, Xm)  A64_LS_REG(Wt, Xn, Xm, 8, STORE)
+#define A64_LDRB(Wt, Xn, Xm)  A64_LS_REG(Wt, Xn, Xm, 8, LOAD)
+#define A64_STRH(Wt, Xn, Xm)  A64_LS_REG(Wt, Xn, Xm, 16, STORE)
+#define A64_LDRH(Wt, Xn, Xm)  A64_LS_REG(Wt, Xn, Xm, 16, LOAD)
+#define A64_STR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, STORE)
+#define A64_LDR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, LOAD)
+#define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE)
+#define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)
+
+/* Load/store register pair */
+#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
+	aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \
+		AARCH64_INSN_VARIANT_64BIT, \
+		AARCH64_INSN_LDST_##ls##_PAIR_##type)
+/* Rn -= 16; Rn[0] = Rt; Rn[8] = Rt2; */
+#define A64_PUSH(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, -16, STORE, PRE_INDEX)
+/* Rt = Rn[0]; Rt2 = Rn[8]; Rn += 16; */
+#define A64_POP(Rt, Rt2, Rn)  A64_LS_PAIR(Rt, Rt2, Rn, 16, LOAD, POST_INDEX)
+
+/* Add/subtract (immediate) */
+#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
+	aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \
+		A64_VARIANT(sf), AARCH64_INSN_ADSB_##type)
+/* Rd = Rn OP imm12 */
+#define A64_ADD_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD)
+#define A64_SUB_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB)
+/* Rd = Rn */
+#define A64_MOV(sf, Rd, Rn) A64_ADD_I(sf, Rd, Rn, 0)
+
+/* Bitfield move */
+#define A64_BITFIELD(sf, Rd, Rn, immr, imms, type) \
+	aarch64_insn_gen_bitfield(Rd, Rn, immr, imms, \
+		A64_VARIANT(sf), AARCH64_INSN_BITFIELD_MOVE_##type)
+/* Signed, with sign replication to left and zeros to right */
+#define A64_SBFM(sf, Rd, Rn, ir, is) A64_BITFIELD(sf, Rd, Rn, ir, is, SIGNED)
+/* Unsigned, with zeros to left and right */
+#define A64_UBFM(sf, Rd, Rn, ir, is) A64_BITFIELD(sf, Rd, Rn, ir, is, UNSIGNED)
+
+/* Rd = Rn << shift */
+#define A64_LSL(sf, Rd, Rn, shift) ({	\
+	int sz = (sf) ? 64 : 32;	\
+	A64_UBFM(sf, Rd, Rn, (unsigned)-(shift) % sz, sz - 1 - (shift)); \
+})
+/* Rd = Rn >> shift */
+#define A64_LSR(sf, Rd, Rn, shift) A64_UBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31)
+/* Rd = Rn >> shift; signed */
+#define A64_ASR(sf, Rd, Rn, shift) A64_SBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31)
+
+/* Zero extend */
+#define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15)
+#define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31)
+
+/* Move wide (immediate) */
+#define A64_MOVEW(sf, Rd, imm16, shift, type) \
+	aarch64_insn_gen_movewide(Rd, imm16, shift, \
+		A64_VARIANT(sf), AARCH64_INSN_MOVEWIDE_##type)
+/* Rd = Zeros (for MOVZ);
+ * Rd |= imm16 << shift (where shift is {0, 16, 32, 48});
+ * Rd = ~Rd; (for MOVN); */
+#define A64_MOVN(sf, Rd, imm16, shift) A64_MOVEW(sf, Rd, imm16, shift, INVERSE)
+#define A64_MOVZ(sf, Rd, imm16, shift) A64_MOVEW(sf, Rd, imm16, shift, ZERO)
+#define A64_MOVK(sf, Rd, imm16, shift) A64_MOVEW(sf, Rd, imm16, shift, KEEP)
+
+/* Add/subtract (shifted register) */
+#define A64_ADDSUB_SREG(sf, Rd, Rn, Rm, type) \
+	aarch64_insn_gen_add_sub_shifted_reg(Rd, Rn, Rm, 0, \
+		A64_VARIANT(sf), AARCH64_INSN_ADSB_##type)
+/* Rd = Rn OP Rm */
+#define A64_ADD(sf, Rd, Rn, Rm)  A64_ADDSUB_SREG(sf, Rd, Rn, Rm, ADD)
+#define A64_SUB(sf, Rd, Rn, Rm)  A64_ADDSUB_SREG(sf, Rd, Rn, Rm, SUB)
+#define A64_SUBS(sf, Rd, Rn, Rm) A64_ADDSUB_SREG(sf, Rd, Rn, Rm, SUB_SETFLAGS)
+/* Rd = -Rm */
+#define A64_NEG(sf, Rd, Rm) A64_SUB(sf, Rd, A64_ZR, Rm)
+/* Rn - Rm; set condition flags */
+#define A64_CMP(sf, Rn, Rm) A64_SUBS(sf, A64_ZR, Rn, Rm)
+
+/* Data-processing (1 source) */
+#define A64_DATA1(sf, Rd, Rn, type) aarch64_insn_gen_data1(Rd, Rn, \
+	A64_VARIANT(sf), AARCH64_INSN_DATA1_##type)
+/* Rd = BSWAPx(Rn) */
+#define A64_REV16(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, REVERSE_16)
+#define A64_REV32(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, REVERSE_32)
+#define A64_REV64(Rd, Rn)     A64_DATA1(1, Rd, Rn, REVERSE_64)
+
+/* Data-processing (2 source) */
+/* Rd = Rn OP Rm */
+#define A64_DATA2(sf, Rd, Rn, Rm, type) aarch64_insn_gen_data2(Rd, Rn, Rm, \
+	A64_VARIANT(sf), AARCH64_INSN_DATA2_##type)
+#define A64_UDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, UDIV)
+#define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV)
+#define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV)
+#define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
+
+/* Data-processing (3 source) */
+/* Rd = Ra + Rn * Rm */
+#define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
+	A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD)
+/* Rd = Rn * Rm */
+#define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm)
+
+/* Logical (shifted register) */
+#define A64_LOGIC_SREG(sf, Rd, Rn, Rm, type) \
+	aarch64_insn_gen_logical_shifted_reg(Rd, Rn, Rm, 0, \
+		A64_VARIANT(sf), AARCH64_INSN_LOGIC_##type)
+/* Rd = Rn OP Rm */
+#define A64_AND(sf, Rd, Rn, Rm)  A64_LOGIC_SREG(sf, Rd, Rn, Rm, AND)
+#define A64_ORR(sf, Rd, Rn, Rm)  A64_LOGIC_SREG(sf, Rd, Rn, Rm, ORR)
+#define A64_EOR(sf, Rd, Rn, Rm)  A64_LOGIC_SREG(sf, Rd, Rn, Rm, EOR)
+#define A64_ANDS(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, AND_SETFLAGS)
+/* Rn & Rm; set condition flags */
+#define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm)
+
+#endif /* _BPF_JIT_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
new file mode 100644
index 000000000..c047598b0
--- /dev/null
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -0,0 +1,765 @@
+/*
+ * BPF JIT compiler for ARM64
+ *
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "bpf_jit: " fmt
+
+#include <linux/filter.h>
+#include <linux/printk.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+
+#include <asm/byteorder.h>
+#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
+
+#include "bpf_jit.h"
+
+int bpf_jit_enable __read_mostly;
+
+#define TMP_REG_1 (MAX_BPF_REG + 0)
+#define TMP_REG_2 (MAX_BPF_REG + 1)
+
+/* Map BPF registers to A64 registers */
+static const int bpf2a64[] = {
+	/* return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = A64_R(7),
+	/* arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = A64_R(0),
+	[BPF_REG_2] = A64_R(1),
+	[BPF_REG_3] = A64_R(2),
+	[BPF_REG_4] = A64_R(3),
+	[BPF_REG_5] = A64_R(4),
+	/* callee saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = A64_R(19),
+	[BPF_REG_7] = A64_R(20),
+	[BPF_REG_8] = A64_R(21),
+	[BPF_REG_9] = A64_R(22),
+	/* read-only frame pointer to access stack */
+	[BPF_REG_FP] = A64_FP,
+	/* temporary register for internal BPF JIT */
+	[TMP_REG_1] = A64_R(23),
+	[TMP_REG_2] = A64_R(24),
+};
+
+struct jit_ctx {
+	const struct bpf_prog *prog;
+	int idx;
+	int tmp_used;
+	int epilogue_offset;
+	int *offset;
+	u32 *image;
+};
+
+static inline void emit(const u32 insn, struct jit_ctx *ctx)
+{
+	if (ctx->image != NULL)
+		ctx->image[ctx->idx] = cpu_to_le32(insn);
+
+	ctx->idx++;
+}
+
+static inline void emit_a64_mov_i64(const int reg, const u64 val,
+				    struct jit_ctx *ctx)
+{
+	u64 tmp = val;
+	int shift = 0;
+
+	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
+	tmp >>= 16;
+	shift += 16;
+	while (tmp) {
+		if (tmp & 0xffff)
+			emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
+		tmp >>= 16;
+		shift += 16;
+	}
+}
+
+static inline void emit_a64_mov_i(const int is64, const int reg,
+				  const s32 val, struct jit_ctx *ctx)
+{
+	u16 hi = val >> 16;
+	u16 lo = val & 0xffff;
+
+	if (hi & 0x8000) {
+		if (hi == 0xffff) {
+			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
+		} else {
+			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
+			emit(A64_MOVK(is64, reg, lo, 0), ctx);
+		}
+	} else {
+		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
+		if (hi)
+			emit(A64_MOVK(is64, reg, hi, 16), ctx);
+	}
+}
+
+static inline int bpf2a64_offset(int bpf_to, int bpf_from,
+				 const struct jit_ctx *ctx)
+{
+	int to = ctx->offset[bpf_to];
+	/* -1 to account for the Branch instruction */
+	int from = ctx->offset[bpf_from] - 1;
+
+	return to - from;
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	u32 *ptr;
+	/* We are guaranteed to have aligned memory. */
+	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
+		*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
+}
+
+static inline int epilogue_offset(const struct jit_ctx *ctx)
+{
+	int to = ctx->epilogue_offset;
+	int from = ctx->idx;
+
+	return to - from;
+}
+
+/* Stack must be multiples of 16B */
+#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+	const u8 r6 = bpf2a64[BPF_REG_6];
+	const u8 r7 = bpf2a64[BPF_REG_7];
+	const u8 r8 = bpf2a64[BPF_REG_8];
+	const u8 r9 = bpf2a64[BPF_REG_9];
+	const u8 fp = bpf2a64[BPF_REG_FP];
+	const u8 ra = bpf2a64[BPF_REG_A];
+	const u8 rx = bpf2a64[BPF_REG_X];
+	const u8 tmp1 = bpf2a64[TMP_REG_1];
+	const u8 tmp2 = bpf2a64[TMP_REG_2];
+	int stack_size = MAX_BPF_STACK;
+
+	stack_size += 4; /* extra for skb_copy_bits buffer */
+	stack_size = STACK_ALIGN(stack_size);
+
+	/* Save callee-saved register */
+	emit(A64_PUSH(r6, r7, A64_SP), ctx);
+	emit(A64_PUSH(r8, r9, A64_SP), ctx);
+	if (ctx->tmp_used)
+		emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
+
+	/* Set up BPF stack */
+	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
+
+	/* Set up frame pointer */
+	emit(A64_MOV(1, fp, A64_SP), ctx);
+
+	/* Clear registers A and X */
+	emit_a64_mov_i64(ra, 0, ctx);
+	emit_a64_mov_i64(rx, 0, ctx);
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	const u8 r0 = bpf2a64[BPF_REG_0];
+	const u8 r6 = bpf2a64[BPF_REG_6];
+	const u8 r7 = bpf2a64[BPF_REG_7];
+	const u8 r8 = bpf2a64[BPF_REG_8];
+	const u8 r9 = bpf2a64[BPF_REG_9];
+	const u8 fp = bpf2a64[BPF_REG_FP];
+	const u8 tmp1 = bpf2a64[TMP_REG_1];
+	const u8 tmp2 = bpf2a64[TMP_REG_2];
+	int stack_size = MAX_BPF_STACK;
+
+	stack_size += 4; /* extra for skb_copy_bits buffer */
+	stack_size = STACK_ALIGN(stack_size);
+
+	/* We're done with BPF stack */
+	emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
+
+	/* Restore callee-saved register */
+	if (ctx->tmp_used)
+		emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
+	emit(A64_POP(r8, r9, A64_SP), ctx);
+	emit(A64_POP(r6, r7, A64_SP), ctx);
+
+	/* Restore frame pointer */
+	emit(A64_MOV(1, fp, A64_SP), ctx);
+
+	/* Set return value */
+	emit(A64_MOV(1, A64_R(0), r0), ctx);
+
+	emit(A64_RET(A64_LR), ctx);
+}
+
+/* JITs an eBPF instruction.
+ * Returns:
+ * 0  - successfully JITed an 8-byte eBPF instruction.
+ * >0 - successfully JITed a 16-byte eBPF instruction.
+ * <0 - failed to JIT.
+ */
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+	const u8 code = insn->code;
+	const u8 dst = bpf2a64[insn->dst_reg];
+	const u8 src = bpf2a64[insn->src_reg];
+	const u8 tmp = bpf2a64[TMP_REG_1];
+	const u8 tmp2 = bpf2a64[TMP_REG_2];
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+	const int i = insn - ctx->prog->insnsi;
+	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+	u8 jmp_cond;
+	s32 jmp_offset;
+
+	switch (code) {
+	/* dst = src */
+	case BPF_ALU | BPF_MOV | BPF_X:
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		emit(A64_MOV(is64, dst, src), ctx);
+		break;
+	/* dst = dst OP src */
+	case BPF_ALU | BPF_ADD | BPF_X:
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+		emit(A64_ADD(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_SUB | BPF_X:
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+		emit(A64_SUB(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_AND | BPF_X:
+	case BPF_ALU64 | BPF_AND | BPF_X:
+		emit(A64_AND(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_OR | BPF_X:
+	case BPF_ALU64 | BPF_OR | BPF_X:
+		emit(A64_ORR(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_X:
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		emit(A64_EOR(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_X:
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+		emit(A64_MUL(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_X:
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+		emit(A64_UDIV(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_MOD | BPF_X:
+	case BPF_ALU64 | BPF_MOD | BPF_X:
+		ctx->tmp_used = 1;
+		emit(A64_UDIV(is64, tmp, dst, src), ctx);
+		emit(A64_MUL(is64, tmp, tmp, src), ctx);
+		emit(A64_SUB(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_LSH | BPF_X:
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		emit(A64_LSLV(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_X:
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		emit(A64_LSRV(is64, dst, dst, src), ctx);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_X:
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		emit(A64_ASRV(is64, dst, dst, src), ctx);
+		break;
+	/* dst = -dst */
+	case BPF_ALU | BPF_NEG:
+	case BPF_ALU64 | BPF_NEG:
+		emit(A64_NEG(is64, dst, dst), ctx);
+		break;
+	/* dst = BSWAP##imm(dst) */
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		if (BPF_SRC(code) == BPF_FROM_BE)
+			goto emit_bswap_uxt;
+#else /* !CONFIG_CPU_BIG_ENDIAN */
+		if (BPF_SRC(code) == BPF_FROM_LE)
+			goto emit_bswap_uxt;
+#endif
+		switch (imm) {
+		case 16:
+			emit(A64_REV16(is64, dst, dst), ctx);
+			/* zero-extend 16 bits into 64 bits */
+			emit(A64_UXTH(is64, dst, dst), ctx);
+			break;
+		case 32:
+			emit(A64_REV32(is64, dst, dst), ctx);
+			/* upper 32 bits already cleared */
+			break;
+		case 64:
+			emit(A64_REV64(dst, dst), ctx);
+			break;
+		}
+		break;
+emit_bswap_uxt:
+		switch (imm) {
+		case 16:
+			/* zero-extend 16 bits into 64 bits */
+			emit(A64_UXTH(is64, dst, dst), ctx);
+			break;
+		case 32:
+			/* zero-extend 32 bits into 64 bits */
+			emit(A64_UXTW(is64, dst, dst), ctx);
+			break;
+		case 64:
+			/* nop */
+			break;
+		}
+		break;
+	/* dst = imm */
+	case BPF_ALU | BPF_MOV | BPF_K:
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+		emit_a64_mov_i(is64, dst, imm, ctx);
+		break;
+	/* dst = dst OP imm */
+	case BPF_ALU | BPF_ADD | BPF_K:
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_ADD(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_SUB | BPF_K:
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_SUB(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_AND | BPF_K:
+	case BPF_ALU64 | BPF_AND | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_AND(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_OR | BPF_K:
+	case BPF_ALU64 | BPF_OR | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_ORR(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_K:
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_EOR(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K:
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_MUL(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_K:
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_UDIV(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_MOD | BPF_K:
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(is64, tmp2, imm, ctx);
+		emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
+		emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
+		emit(A64_SUB(is64, dst, dst, tmp), ctx);
+		break;
+	case BPF_ALU | BPF_LSH | BPF_K:
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		emit(A64_LSL(is64, dst, dst, imm), ctx);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_K:
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		emit(A64_LSR(is64, dst, dst, imm), ctx);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_K:
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		emit(A64_ASR(is64, dst, dst, imm), ctx);
+		break;
+
+#define check_imm(bits, imm) do {				\
+	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
+	    (((imm) < 0) && (~(imm) >> (bits)))) {		\
+		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
+			i, imm, imm);				\
+		return -EINVAL;					\
+	}							\
+} while (0)
+#define check_imm19(imm) check_imm(19, imm)
+#define check_imm26(imm) check_imm(26, imm)
+
+	/* JUMP off */
+	case BPF_JMP | BPF_JA:
+		jmp_offset = bpf2a64_offset(i + off, i, ctx);
+		check_imm26(jmp_offset);
+		emit(A64_B(jmp_offset), ctx);
+		break;
+	/* IF (dst COND src) JUMP off */
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+		emit(A64_CMP(1, dst, src), ctx);
+emit_cond_jmp:
+		jmp_offset = bpf2a64_offset(i + off, i, ctx);
+		check_imm19(jmp_offset);
+		switch (BPF_OP(code)) {
+		case BPF_JEQ:
+			jmp_cond = A64_COND_EQ;
+			break;
+		case BPF_JGT:
+			jmp_cond = A64_COND_HI;
+			break;
+		case BPF_JGE:
+			jmp_cond = A64_COND_CS;
+			break;
+		case BPF_JNE:
+			jmp_cond = A64_COND_NE;
+			break;
+		case BPF_JSGT:
+			jmp_cond = A64_COND_GT;
+			break;
+		case BPF_JSGE:
+			jmp_cond = A64_COND_GE;
+			break;
+		default:
+			return -EFAULT;
+		}
+		emit(A64_B_(jmp_cond, jmp_offset), ctx);
+		break;
+	case BPF_JMP | BPF_JSET | BPF_X:
+		emit(A64_TST(1, dst, src), ctx);
+		goto emit_cond_jmp;
+	/* IF (dst COND imm) JUMP off */
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JNE | BPF_K:
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSGE | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(1, tmp, imm, ctx);
+		emit(A64_CMP(1, dst, tmp), ctx);
+		goto emit_cond_jmp;
+	case BPF_JMP | BPF_JSET | BPF_K:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(1, tmp, imm, ctx);
+		emit(A64_TST(1, dst, tmp), ctx);
+		goto emit_cond_jmp;
+	/* function call */
+	case BPF_JMP | BPF_CALL:
+	{
+		const u8 r0 = bpf2a64[BPF_REG_0];
+		const u64 func = (u64)__bpf_call_base + imm;
+
+		ctx->tmp_used = 1;
+		emit_a64_mov_i64(tmp, func, ctx);
+		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+		emit(A64_BLR(tmp), ctx);
+		emit(A64_MOV(1, r0, A64_R(0)), ctx);
+		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
+		break;
+	}
+	/* function return */
+	case BPF_JMP | BPF_EXIT:
+		/* Optimization: when last instruction is EXIT,
+		   simply fallthrough to epilogue. */
+		if (i == ctx->prog->len - 1)
+			break;
+		jmp_offset = epilogue_offset(ctx);
+		check_imm26(jmp_offset);
+		emit(A64_B(jmp_offset), ctx);
+		break;
+
+	/* dst = imm64 */
+	case BPF_LD | BPF_IMM | BPF_DW:
+	{
+		const struct bpf_insn insn1 = insn[1];
+		u64 imm64;
+
+		if (insn1.code != 0 || insn1.src_reg != 0 ||
+		    insn1.dst_reg != 0 || insn1.off != 0) {
+			/* Note: verifier in BPF core must catch invalid
+			 * instructions.
+			 */
+			pr_err_once("Invalid BPF_LD_IMM64 instruction\n");
+			return -EINVAL;
+		}
+
+		imm64 = (u64)insn1.imm << 32 | (u32)imm;
+		emit_a64_mov_i64(dst, imm64, ctx);
+
+		return 1;
+	}
+
+	/* LDX: dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_DW:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(1, tmp, off, ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(A64_LDR32(dst, src, tmp), ctx);
+			break;
+		case BPF_H:
+			emit(A64_LDRH(dst, src, tmp), ctx);
+			break;
+		case BPF_B:
+			emit(A64_LDRB(dst, src, tmp), ctx);
+			break;
+		case BPF_DW:
+			emit(A64_LDR64(dst, src, tmp), ctx);
+			break;
+		}
+		break;
+
+	/* ST: *(size *)(dst + off) = imm */
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		goto notyet;
+
+	/* STX: *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+	case BPF_STX | BPF_MEM | BPF_H:
+	case BPF_STX | BPF_MEM | BPF_B:
+	case BPF_STX | BPF_MEM | BPF_DW:
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(1, tmp, off, ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(A64_STR32(src, dst, tmp), ctx);
+			break;
+		case BPF_H:
+			emit(A64_STRH(src, dst, tmp), ctx);
+			break;
+		case BPF_B:
+			emit(A64_STRB(src, dst, tmp), ctx);
+			break;
+		case BPF_DW:
+			emit(A64_STR64(src, dst, tmp), ctx);
+			break;
+		}
+		break;
+	/* STX XADD: lock *(u32 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_W:
+	/* STX XADD: lock *(u64 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_DW:
+		goto notyet;
+
+	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
+	case BPF_LD | BPF_ABS | BPF_W:
+	case BPF_LD | BPF_ABS | BPF_H:
+	case BPF_LD | BPF_ABS | BPF_B:
+	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
+	case BPF_LD | BPF_IND | BPF_W:
+	case BPF_LD | BPF_IND | BPF_H:
+	case BPF_LD | BPF_IND | BPF_B:
+	{
+		const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
+		const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
+		const u8 fp = bpf2a64[BPF_REG_FP];
+		const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
+		const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
+		const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
+		const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
+		const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
+		int size;
+
+		emit(A64_MOV(1, r1, r6), ctx);
+		emit_a64_mov_i(0, r2, imm, ctx);
+		if (BPF_MODE(code) == BPF_IND)
+			emit(A64_ADD(0, r2, r2, src), ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			size = 4;
+			break;
+		case BPF_H:
+			size = 2;
+			break;
+		case BPF_B:
+			size = 1;
+			break;
+		default:
+			return -EINVAL;
+		}
+		emit_a64_mov_i64(r3, size, ctx);
+		emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx);
+		emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
+		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+		emit(A64_BLR(r5), ctx);
+		emit(A64_MOV(1, r0, A64_R(0)), ctx);
+		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
+
+		jmp_offset = epilogue_offset(ctx);
+		check_imm19(jmp_offset);
+		emit(A64_CBZ(1, r0, jmp_offset), ctx);
+		emit(A64_MOV(1, r5, r0), ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(A64_LDR32(r0, r5, A64_ZR), ctx);
+#ifndef CONFIG_CPU_BIG_ENDIAN
+			emit(A64_REV32(0, r0, r0), ctx);
+#endif
+			break;
+		case BPF_H:
+			emit(A64_LDRH(r0, r5, A64_ZR), ctx);
+#ifndef CONFIG_CPU_BIG_ENDIAN
+			emit(A64_REV16(0, r0, r0), ctx);
+#endif
+			break;
+		case BPF_B:
+			emit(A64_LDRB(r0, r5, A64_ZR), ctx);
+			break;
+		}
+		break;
+	}
+notyet:
+		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+		return -EFAULT;
+
+	default:
+		pr_err_once("unknown opcode %02x\n", code);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	int i;
+
+	for (i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &prog->insnsi[i];
+		int ret;
+
+		ret = build_insn(insn, ctx);
+
+		if (ctx->image == NULL)
+			ctx->offset[i] = ctx->idx;
+
+		if (ret > 0) {
+			i++;
+			continue;
+		}
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+void bpf_jit_compile(struct bpf_prog *prog)
+{
+	/* Nothing to do here. We support Internal BPF. */
+}
+
+void bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	struct bpf_binary_header *header;
+	struct jit_ctx ctx;
+	int image_size;
+	u8 *image_ptr;
+
+	if (!bpf_jit_enable)
+		return;
+
+	if (!prog || !prog->len)
+		return;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.prog = prog;
+
+	ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+	if (ctx.offset == NULL)
+		return;
+
+	/* 1. Initial fake pass to compute ctx->idx. */
+
+	/* Fake pass to fill in ctx->offset and ctx->tmp_used. */
+	if (build_body(&ctx))
+		goto out;
+
+	build_prologue(&ctx);
+
+	ctx.epilogue_offset = ctx.idx;
+	build_epilogue(&ctx);
+
+	/* Now we know the actual image size. */
+	image_size = sizeof(u32) * ctx.idx;
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	if (header == NULL)
+		goto out;
+
+	/* 2. Now, the actual pass. */
+
+	ctx.image = (u32 *)image_ptr;
+	ctx.idx = 0;
+
+	build_prologue(&ctx);
+
+	if (build_body(&ctx)) {
+		bpf_jit_binary_free(header);
+		goto out;
+	}
+
+	build_epilogue(&ctx);
+
+	/* And we're done. */
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
+
+	bpf_flush_icache(ctx.image, ctx.image + ctx.idx);
+
+	set_memory_ro((unsigned long)header, header->pages);
+	prog->bpf_func = (void *)ctx.image;
+	prog->jited = true;
+out:
+	kfree(ctx.offset);
+}
+
+void bpf_jit_free(struct bpf_prog *prog)
+{
+	unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK;
+	struct bpf_binary_header *header = (void *)addr;
+
+	if (!prog->jited)
+		goto free_filter;
+
+	set_memory_rw(addr, header->pages);
+	bpf_jit_binary_free(header);
+
+free_filter:
+	bpf_prog_unlock_free(prog);
+}
diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile
new file mode 100644
index 000000000..74a8d87e5
--- /dev/null
+++ b/arch/arm64/xen/Makefile
@@ -0,0 +1,2 @@
+xen-arm-y	+= $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o)
+obj-y		:= xen-arm.o hypercall.o
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
new file mode 100644
index 000000000..8bbe9401f
--- /dev/null
+++ b/arch/arm64/xen/hypercall.S
@@ -0,0 +1,94 @@
+/******************************************************************************
+ * hypercall.S
+ *
+ * Xen hypercall wrappers
+ *
+ * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * The Xen hypercall calling convention is very similar to the procedure
+ * call standard for the ARM 64-bit architecture: the first parameter is
+ * passed in x0, the second in x1, the third in x2, the fourth in x3 and
+ * the fifth in x4.
+ *
+ * The hypercall number is passed in x16.
+ *
+ * The return value is in x0.
+ *
+ * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM
+ * hypercall tag.
+ *
+ * Parameter structs passed to hypercalls are laid out according to
+ * the ARM 64-bit EABI standard.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <xen/interface/xen.h>
+
+
+#define XEN_IMM 0xEA1
+
+#define HYPERCALL_SIMPLE(hypercall)		\
+ENTRY(HYPERVISOR_##hypercall)			\
+	mov x16, #__HYPERVISOR_##hypercall;	\
+	hvc XEN_IMM;				\
+	ret;					\
+ENDPROC(HYPERVISOR_##hypercall)
+
+#define HYPERCALL0 HYPERCALL_SIMPLE
+#define HYPERCALL1 HYPERCALL_SIMPLE
+#define HYPERCALL2 HYPERCALL_SIMPLE
+#define HYPERCALL3 HYPERCALL_SIMPLE
+#define HYPERCALL4 HYPERCALL_SIMPLE
+#define HYPERCALL5 HYPERCALL_SIMPLE
+
+                .text
+
+HYPERCALL2(xen_version);
+HYPERCALL3(console_io);
+HYPERCALL3(grant_table_op);
+HYPERCALL2(sched_op);
+HYPERCALL2(event_channel_op);
+HYPERCALL2(hvm_op);
+HYPERCALL2(memory_op);
+HYPERCALL2(physdev_op);
+HYPERCALL3(vcpu_op);
+HYPERCALL1(tmem_op);
+HYPERCALL2(multicall);
+
+ENTRY(privcmd_call)
+	mov x16, x0
+	mov x0, x1
+	mov x1, x2
+	mov x2, x3
+	mov x3, x4
+	mov x4, x5
+	hvc XEN_IMM
+	ret
+ENDPROC(privcmd_call);